git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "libavutil/stereo3d.h"
  33
  34 #include "bytestream.h"
  35 #include "cabac_functions.h"
  36 #include "dsputil.h"
  37 #include "golomb.h"
  38 #include "hevc.h"
  39
  40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  43
  44 static const uint8_t scan_1x1[1] = { 0 };
  45
  46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  47
  48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  49
  50 static const uint8_t horiz_scan4x4_x[16] = {
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55 };
  56
  57 static const uint8_t horiz_scan4x4_y[16] = {
  58     0, 0, 0, 0,
  59     1, 1, 1, 1,
  60     2, 2, 2, 2,
  61     3, 3, 3, 3,
  62 };
  63
  64 static const uint8_t horiz_scan8x8_inv[8][8] = {
  65     {  0,  1,  2,  3, 16, 17, 18, 19, },
  66     {  4,  5,  6,  7, 20, 21, 22, 23, },
  67     {  8,  9, 10, 11, 24, 25, 26, 27, },
  68     { 12, 13, 14, 15, 28, 29, 30, 31, },
  69     { 32, 33, 34, 35, 48, 49, 50, 51, },
  70     { 36, 37, 38, 39, 52, 53, 54, 55, },
  71     { 40, 41, 42, 43, 56, 57, 58, 59, },
  72     { 44, 45, 46, 47, 60, 61, 62, 63, },
  73 };
  74
  75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  76
  77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  78
  79 static const uint8_t diag_scan2x2_inv[2][2] = {
  80     { 0, 2, },
  81     { 1, 3, },
  82 };
  83
  84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  85     0, 0, 1, 0,
  86     1, 2, 0, 1,
  87     2, 3, 1, 2,
  88     3, 2, 3, 3,
  89 };
  90
  91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  92     0, 1, 0, 2,
  93     1, 0, 3, 2,
  94     1, 0, 3, 2,
  95     1, 3, 2, 3,
  96 };
  97
  98 static const uint8_t diag_scan4x4_inv[4][4] = {
  99     { 0,  2,  5,  9, },
 100     { 1,  4,  8, 12, },
 101     { 3,  7, 11, 14, },
 102     { 6, 10, 13, 15, },
 103 };
 104
 105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 106     0, 0, 1, 0,
 107     1, 2, 0, 1,
 108     2, 3, 0, 1,
 109     2, 3, 4, 0,
 110     1, 2, 3, 4,
 111     5, 0, 1, 2,
 112     3, 4, 5, 6,
 113     0, 1, 2, 3,
 114     4, 5, 6, 7,
 115     1, 2, 3, 4,
 116     5, 6, 7, 2,
 117     3, 4, 5, 6,
 118     7, 3, 4, 5,
 119     6, 7, 4, 5,
 120     6, 7, 5, 6,
 121     7, 6, 7, 7,
 122 };
 123
 124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 125     0, 1, 0, 2,
 126     1, 0, 3, 2,
 127     1, 0, 4, 3,
 128     2, 1, 0, 5,
 129     4, 3, 2, 1,
 130     0, 6, 5, 4,
 131     3, 2, 1, 0,
 132     7, 6, 5, 4,
 133     3, 2, 1, 0,
 134     7, 6, 5, 4,
 135     3, 2, 1, 7,
 136     6, 5, 4, 3,
 137     2, 7, 6, 5,
 138     4, 3, 7, 6,
 139     5, 4, 7, 6,
 140     5, 7, 6, 7,
 141 };
 142
 143 static const uint8_t diag_scan8x8_inv[8][8] = {
 144     {  0,  2,  5,  9, 14, 20, 27, 35, },
 145     {  1,  4,  8, 13, 19, 26, 34, 42, },
 146     {  3,  7, 12, 18, 25, 33, 41, 48, },
 147     {  6, 11, 17, 24, 32, 40, 47, 53, },
 148     { 10, 16, 23, 31, 39, 46, 52, 57, },
 149     { 15, 22, 30, 38, 45, 51, 56, 60, },
 150     { 21, 29, 37, 44, 50, 55, 59, 62, },
 151     { 28, 36, 43, 49, 54, 58, 61, 63, },
 152 };
 153
 154 /**
 155  * NOTE: Each function hls_foo correspond to the function foo in the
 156  * specification (HLS stands for High Level Syntax).
 157  */
 158
 159 /**
 160  * Section 5.7
 161  */
 162
 163 /* free everything allocated  by pic_arrays_init() */
 164 static void pic_arrays_free(HEVCContext *s)
 165 {
 166     av_freep(&s->sao);
 167     av_freep(&s->deblock);
 168     av_freep(&s->split_cu_flag);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size         = width * height;
 195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 196                            ((height >> log2_min_cb_size) + 1);
 197     int ctb_count        = sps->ctb_width * sps->ctb_height;
 198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 199
 200     s->bs_width  = width  >> 3;
 201     s->bs_height = height >> 3;
 202
 203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 205     s->split_cu_flag = av_malloc(pic_size);
 206     if (!s->sao || !s->deblock || !s->split_cu_flag)
 207         goto fail;
 208
 209     s->skip_flag    = av_malloc(pic_size_in_ctb);
 210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 211     if (!s->skip_flag || !s->tab_ct_depth)
 212         goto fail;
 213
 214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 215     s->tab_ipm  = av_malloc(min_pu_size);
 216     s->is_pcm   = av_malloc(min_pu_size);
 217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 218         goto fail;
 219
 220     s->filter_slice_edges = av_malloc(ctb_count);
 221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->tab_slice_address));
 223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 224                                       sizeof(*s->qp_y_tab));
 225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 226         goto fail;
 227
 228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 230     if (!s->horizontal_bs || !s->vertical_bs)
 231         goto fail;
 232
 233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 234                                           av_buffer_alloc);
 235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 236                                           av_buffer_allocz);
 237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 238         goto fail;
 239
 240     return 0;
 241
 242 fail:
 243     pic_arrays_free(s);
 244     return AVERROR(ENOMEM);
 245 }
 246
 247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 248 {
 249     int i = 0;
 250     int j = 0;
 251     uint8_t luma_weight_l0_flag[16];
 252     uint8_t chroma_weight_l0_flag[16];
 253     uint8_t luma_weight_l1_flag[16];
 254     uint8_t chroma_weight_l1_flag[16];
 255
 256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 257     if (s->sps->chroma_format_idc != 0) {
 258         int delta = get_se_golomb(gb);
 259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 260     }
 261
 262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 263         luma_weight_l0_flag[i] = get_bits1(gb);
 264         if (!luma_weight_l0_flag[i]) {
 265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 266             s->sh.luma_offset_l0[i] = 0;
 267         }
 268     }
 269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 271             chroma_weight_l0_flag[i] = get_bits1(gb);
 272     } else {
 273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 274             chroma_weight_l0_flag[i] = 0;
 275     }
 276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 277         if (luma_weight_l0_flag[i]) {
 278             int delta_luma_weight_l0 = get_se_golomb(gb);
 279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 281         }
 282         if (chroma_weight_l0_flag[i]) {
 283             for (j = 0; j < 2; j++) {
 284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 289             }
 290         } else {
 291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][0] = 0;
 293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 294             s->sh.chroma_offset_l0[i][1] = 0;
 295         }
 296     }
 297     if (s->sh.slice_type == B_SLICE) {
 298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 299             luma_weight_l1_flag[i] = get_bits1(gb);
 300             if (!luma_weight_l1_flag[i]) {
 301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 302                 s->sh.luma_offset_l1[i] = 0;
 303             }
 304         }
 305         if (s->sps->chroma_format_idc != 0) {
 306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 307                 chroma_weight_l1_flag[i] = get_bits1(gb);
 308         } else {
 309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 310                 chroma_weight_l1_flag[i] = 0;
 311         }
 312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 313             if (luma_weight_l1_flag[i]) {
 314                 int delta_luma_weight_l1 = get_se_golomb(gb);
 315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 317             }
 318             if (chroma_weight_l1_flag[i]) {
 319                 for (j = 0; j < 2; j++) {
 320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 325                 }
 326             } else {
 327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][0] = 0;
 329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 330                 s->sh.chroma_offset_l1[i][1] = 0;
 331             }
 332         }
 333     }
 334 }
 335
 336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 337 {
 338     const HEVCSPS *sps = s->sps;
 339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 340     int prev_delta_msb = 0;
 341     int nb_sps = 0, nb_sh;
 342     int i;
 343
 344     rps->nb_refs = 0;
 345     if (!sps->long_term_ref_pics_present_flag)
 346         return 0;
 347
 348     if (sps->num_long_term_ref_pics_sps > 0)
 349         nb_sps = get_ue_golomb_long(gb);
 350     nb_sh = get_ue_golomb_long(gb);
 351
 352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 353         return AVERROR_INVALIDDATA;
 354
 355     rps->nb_refs = nb_sh + nb_sps;
 356
 357     for (i = 0; i < rps->nb_refs; i++) {
 358         uint8_t delta_poc_msb_present;
 359
 360         if (i < nb_sps) {
 361             uint8_t lt_idx_sps = 0;
 362
 363             if (sps->num_long_term_ref_pics_sps > 1)
 364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 365
 366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 368         } else {
 369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 370             rps->used[i] = get_bits1(gb);
 371         }
 372
 373         delta_poc_msb_present = get_bits1(gb);
 374         if (delta_poc_msb_present) {
 375             int delta = get_ue_golomb_long(gb);
 376
 377             if (i && i != nb_sps)
 378                 delta += prev_delta_msb;
 379
 380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 381             prev_delta_msb = delta;
 382         }
 383     }
 384
 385     return 0;
 386 }
 387
 388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 389 {
 390     int ret;
 391     int num = 0, den = 0;
 392
 393     pic_arrays_free(s);
 394     ret = pic_arrays_init(s, sps);
 395     if (ret < 0)
 396         goto fail;
 397
 398     s->avctx->coded_width         = sps->width;
 399     s->avctx->coded_height        = sps->height;
 400     s->avctx->width               = sps->output_width;
 401     s->avctx->height              = sps->output_height;
 402     s->avctx->pix_fmt             = sps->pix_fmt;
 403     s->avctx->sample_aspect_ratio = sps->vui.sar;
 404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 405
 406     if (sps->vui.video_signal_type_present_flag)
 407         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 408                                                                : AVCOL_RANGE_MPEG;
 409     else
 410         s->avctx->color_range = AVCOL_RANGE_MPEG;
 411
 412     if (sps->vui.colour_description_present_flag) {
 413         s->avctx->color_primaries = sps->vui.colour_primaries;
 414         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 415         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 416     } else {
 417         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 418         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 419         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 420     }
 421
 422     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 423     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 424     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 425
 426     if (sps->sao_enabled) {
 427         av_frame_unref(s->tmp_frame);
 428         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 429         if (ret < 0)
 430             goto fail;
 431         s->frame = s->tmp_frame;
 432     }
 433
 434     s->sps = sps;
 435     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 436
 437     if (s->vps->vps_timing_info_present_flag) {
 438         num = s->vps->vps_num_units_in_tick;
 439         den = s->vps->vps_time_scale;
 440     } else if (sps->vui.vui_timing_info_present_flag) {
 441         num = sps->vui.vui_num_units_in_tick;
 442         den = sps->vui.vui_time_scale;
 443     }
 444
 445     if (num != 0 && den != 0)
 446         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 447                   num, den, 1 << 30);
 448
 449     return 0;
 450
 451 fail:
 452     pic_arrays_free(s);
 453     s->sps = NULL;
 454     return ret;
 455 }
 456
 457 static int hls_slice_header(HEVCContext *s)
 458 {
 459     GetBitContext *gb = &s->HEVClc.gb;
 460     SliceHeader *sh   = &s->sh;
 461     int i, ret;
 462
 463     // Coded parameters
 464     sh->first_slice_in_pic_flag = get_bits1(gb);
 465     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 466         s->seq_decode = (s->seq_decode + 1) & 0xff;
 467         s->max_ra     = INT_MAX;
 468         if (IS_IDR(s))
 469             ff_hevc_clear_refs(s);
 470     }
 471     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 472         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 473
 474     sh->pps_id = get_ue_golomb_long(gb);
 475     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 476         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 477         return AVERROR_INVALIDDATA;
 478     }
 479     if (!sh->first_slice_in_pic_flag &&
 480         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 481         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 482         return AVERROR_INVALIDDATA;
 483     }
 484     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 485
 486     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 487         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 488
 489         ff_hevc_clear_refs(s);
 490         ret = set_sps(s, s->sps);
 491         if (ret < 0)
 492             return ret;
 493
 494         s->seq_decode = (s->seq_decode + 1) & 0xff;
 495         s->max_ra     = INT_MAX;
 496     }
 497
 498     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 499     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 500
 501     sh->dependent_slice_segment_flag = 0;
 502     if (!sh->first_slice_in_pic_flag) {
 503         int slice_address_length;
 504
 505         if (s->pps->dependent_slice_segments_enabled_flag)
 506             sh->dependent_slice_segment_flag = get_bits1(gb);
 507
 508         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 509                                             s->sps->ctb_height);
 510         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 511         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 512             av_log(s->avctx, AV_LOG_ERROR,
 513                    "Invalid slice segment address: %u.\n",
 514                    sh->slice_segment_addr);
 515             return AVERROR_INVALIDDATA;
 516         }
 517
 518         if (!sh->dependent_slice_segment_flag) {
 519             sh->slice_addr = sh->slice_segment_addr;
 520             s->slice_idx++;
 521         }
 522     } else {
 523         sh->slice_segment_addr = sh->slice_addr = 0;
 524         s->slice_idx           = 0;
 525         s->slice_initialized   = 0;
 526     }
 527
 528     if (!sh->dependent_slice_segment_flag) {
 529         s->slice_initialized = 0;
 530
 531         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 532             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 533
 534         sh->slice_type = get_ue_golomb_long(gb);
 535         if (!(sh->slice_type == I_SLICE ||
 536               sh->slice_type == P_SLICE ||
 537               sh->slice_type == B_SLICE)) {
 538             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 539                    sh->slice_type);
 540             return AVERROR_INVALIDDATA;
 541         }
 542         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 543             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 544             return AVERROR_INVALIDDATA;
 545         }
 546
 547         if (s->pps->output_flag_present_flag)
 548             sh->pic_output_flag = get_bits1(gb);
 549
 550         if (s->sps->separate_colour_plane_flag)
 551             sh->colour_plane_id = get_bits(gb, 2);
 552
 553         if (!IS_IDR(s)) {
 554             int short_term_ref_pic_set_sps_flag, poc;
 555
 556             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 557             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 558             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 559                 av_log(s->avctx, AV_LOG_WARNING,
 560                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 561                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 562                     return AVERROR_INVALIDDATA;
 563                 poc = s->poc;
 564             }
 565             s->poc = poc;
 566
 567             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 568             if (!short_term_ref_pic_set_sps_flag) {
 569                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 570                 if (ret < 0)
 571                     return ret;
 572
 573                 sh->short_term_rps = &sh->slice_rps;
 574             } else {
 575                 int numbits, rps_idx;
 576
 577                 if (!s->sps->nb_st_rps) {
 578                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 579                     return AVERROR_INVALIDDATA;
 580                 }
 581
 582                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 583                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 584                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 585             }
 586
 587             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 588             if (ret < 0) {
 589                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 590                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 591                     return AVERROR_INVALIDDATA;
 592             }
 593
 594             if (s->sps->sps_temporal_mvp_enabled_flag)
 595                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 596             else
 597                 sh->slice_temporal_mvp_enabled_flag = 0;
 598         } else {
 599             s->sh.short_term_rps = NULL;
 600             s->poc               = 0;
 601         }
 602
 603         /* 8.3.1 */
 604         if (s->temporal_id == 0 &&
 605             s->nal_unit_type != NAL_TRAIL_N &&
 606             s->nal_unit_type != NAL_TSA_N   &&
 607             s->nal_unit_type != NAL_STSA_N  &&
 608             s->nal_unit_type != NAL_RADL_N  &&
 609             s->nal_unit_type != NAL_RADL_R  &&
 610             s->nal_unit_type != NAL_RASL_N  &&
 611             s->nal_unit_type != NAL_RASL_R)
 612             s->pocTid0 = s->poc;
 613
 614         if (s->sps->sao_enabled) {
 615             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 616             sh->slice_sample_adaptive_offset_flag[1] =
 617             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 618         } else {
 619             sh->slice_sample_adaptive_offset_flag[0] = 0;
 620             sh->slice_sample_adaptive_offset_flag[1] = 0;
 621             sh->slice_sample_adaptive_offset_flag[2] = 0;
 622         }
 623
 624         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 625         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 626             int nb_refs;
 627
 628             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 629             if (sh->slice_type == B_SLICE)
 630                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 631
 632             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 633                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 634                 if (sh->slice_type == B_SLICE)
 635                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 636             }
 637             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 638                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 639                        sh->nb_refs[L0], sh->nb_refs[L1]);
 640                 return AVERROR_INVALIDDATA;
 641             }
 642
 643             sh->rpl_modification_flag[0] = 0;
 644             sh->rpl_modification_flag[1] = 0;
 645             nb_refs = ff_hevc_frame_nb_refs(s);
 646             if (!nb_refs) {
 647                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 648                 return AVERROR_INVALIDDATA;
 649             }
 650
 651             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 652                 sh->rpl_modification_flag[0] = get_bits1(gb);
 653                 if (sh->rpl_modification_flag[0]) {
 654                     for (i = 0; i < sh->nb_refs[L0]; i++)
 655                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 656                 }
 657
 658                 if (sh->slice_type == B_SLICE) {
 659                     sh->rpl_modification_flag[1] = get_bits1(gb);
 660                     if (sh->rpl_modification_flag[1] == 1)
 661                         for (i = 0; i < sh->nb_refs[L1]; i++)
 662                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 663                 }
 664             }
 665
 666             if (sh->slice_type == B_SLICE)
 667                 sh->mvd_l1_zero_flag = get_bits1(gb);
 668
 669             if (s->pps->cabac_init_present_flag)
 670                 sh->cabac_init_flag = get_bits1(gb);
 671             else
 672                 sh->cabac_init_flag = 0;
 673
 674             sh->collocated_ref_idx = 0;
 675             if (sh->slice_temporal_mvp_enabled_flag) {
 676                 sh->collocated_list = L0;
 677                 if (sh->slice_type == B_SLICE)
 678                     sh->collocated_list = !get_bits1(gb);
 679
 680                 if (sh->nb_refs[sh->collocated_list] > 1) {
 681                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 682                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 683                         av_log(s->avctx, AV_LOG_ERROR,
 684                                "Invalid collocated_ref_idx: %d.\n",
 685                                sh->collocated_ref_idx);
 686                         return AVERROR_INVALIDDATA;
 687                     }
 688                 }
 689             }
 690
 691             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 692                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 693                 pred_weight_table(s, gb);
 694             }
 695
 696             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 697             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 698                 av_log(s->avctx, AV_LOG_ERROR,
 699                        "Invalid number of merging MVP candidates: %d.\n",
 700                        sh->max_num_merge_cand);
 701                 return AVERROR_INVALIDDATA;
 702             }
 703         }
 704
 705         sh->slice_qp_delta = get_se_golomb(gb);
 706         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 707             sh->slice_cb_qp_offset = get_se_golomb(gb);
 708             sh->slice_cr_qp_offset = get_se_golomb(gb);
 709         } else {
 710             sh->slice_cb_qp_offset = 0;
 711             sh->slice_cr_qp_offset = 0;
 712         }
 713
 714         if (s->pps->deblocking_filter_control_present_flag) {
 715             int deblocking_filter_override_flag = 0;
 716
 717             if (s->pps->deblocking_filter_override_enabled_flag)
 718                 deblocking_filter_override_flag = get_bits1(gb);
 719
 720             if (deblocking_filter_override_flag) {
 721                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 722                 if (!sh->disable_deblocking_filter_flag) {
 723                     sh->beta_offset = get_se_golomb(gb) * 2;
 724                     sh->tc_offset   = get_se_golomb(gb) * 2;
 725                 }
 726             } else {
 727                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 728                 sh->beta_offset                    = s->pps->beta_offset;
 729                 sh->tc_offset                      = s->pps->tc_offset;
 730             }
 731         } else {
 732             sh->disable_deblocking_filter_flag = 0;
 733             sh->beta_offset                    = 0;
 734             sh->tc_offset                      = 0;
 735         }
 736
 737         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 738             (sh->slice_sample_adaptive_offset_flag[0] ||
 739              sh->slice_sample_adaptive_offset_flag[1] ||
 740              !sh->disable_deblocking_filter_flag)) {
 741             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 742         } else {
 743             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 744         }
 745     } else if (!s->slice_initialized) {
 746         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 747         return AVERROR_INVALIDDATA;
 748     }
 749
 750     sh->num_entry_point_offsets = 0;
 751     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 752         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 753         if (sh->num_entry_point_offsets > 0) {
 754             int offset_len = get_ue_golomb_long(gb) + 1;
 755
 756             for (i = 0; i < sh->num_entry_point_offsets; i++)
 757                 skip_bits(gb, offset_len);
 758         }
 759     }
 760
 761     if (s->pps->slice_header_extension_present_flag) {
 762         int length = get_ue_golomb_long(gb);
 763         for (i = 0; i < length; i++)
 764             skip_bits(gb, 8);  // slice_header_extension_data_byte
 765     }
 766
 767     // Inferred parameters
 768     sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 769     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 770
 771     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 772
 773     if (!s->pps->cu_qp_delta_enabled_flag)
 774         s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
 775                           (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
 776
 777     s->slice_initialized = 1;
 778
 779     return 0;
 780 }
 781
 782 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 783
 784 #define SET_SAO(elem, value)                            \
 785 do {                                                    \
 786     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 787         sao->elem = value;                              \
 788     else if (sao_merge_left_flag)                       \
 789         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 790     else if (sao_merge_up_flag)                         \
 791         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 792     else                                                \
 793         sao->elem = 0;                                  \
 794 } while (0)
 795
 796 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 797 {
 798     HEVCLocalContext *lc    = &s->HEVClc;
 799     int sao_merge_left_flag = 0;
 800     int sao_merge_up_flag   = 0;
 801     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 802     SAOParams *sao          = &CTB(s->sao, rx, ry);
 803     int c_idx, i;
 804
 805     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 806         s->sh.slice_sample_adaptive_offset_flag[1]) {
 807         if (rx > 0) {
 808             if (lc->ctb_left_flag)
 809                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 810         }
 811         if (ry > 0 && !sao_merge_left_flag) {
 812             if (lc->ctb_up_flag)
 813                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 814         }
 815     }
 816
 817     for (c_idx = 0; c_idx < 3; c_idx++) {
 818         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 819             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 820             continue;
 821         }
 822
 823         if (c_idx == 2) {
 824             sao->type_idx[2] = sao->type_idx[1];
 825             sao->eo_class[2] = sao->eo_class[1];
 826         } else {
 827             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 828         }
 829
 830         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 831             continue;
 832
 833         for (i = 0; i < 4; i++)
 834             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 835
 836         if (sao->type_idx[c_idx] == SAO_BAND) {
 837             for (i = 0; i < 4; i++) {
 838                 if (sao->offset_abs[c_idx][i]) {
 839                     SET_SAO(offset_sign[c_idx][i],
 840                             ff_hevc_sao_offset_sign_decode(s));
 841                 } else {
 842                     sao->offset_sign[c_idx][i] = 0;
 843                 }
 844             }
 845             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 846         } else if (c_idx != 2) {
 847             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 848         }
 849
 850         // Inferred parameters
 851         sao->offset_val[c_idx][0] = 0;
 852         for (i = 0; i < 4; i++) {
 853             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 854             if (sao->type_idx[c_idx] == SAO_EDGE) {
 855                 if (i > 1)
 856                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 857             } else if (sao->offset_sign[c_idx][i]) {
 858                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 859             }
 860         }
 861     }
 862 }
 863
 864 #undef SET_SAO
 865 #undef CTB
 866
 867 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 868                                 int log2_trafo_size, enum ScanType scan_idx,
 869                                 int c_idx)
 870 {
 871 #define GET_COORD(offset, n)                                    \
 872     do {                                                        \
 873         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 874         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 875     } while (0)
 876     HEVCLocalContext *lc    = &s->HEVClc;
 877     int transform_skip_flag = 0;
 878
 879     int last_significant_coeff_x, last_significant_coeff_y;
 880     int last_scan_pos;
 881     int n_end;
 882     int num_coeff    = 0;
 883     int greater1_ctx = 1;
 884
 885     int num_last_subset;
 886     int x_cg_last_sig, y_cg_last_sig;
 887
 888     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 889
 890     ptrdiff_t stride = s->frame->linesize[c_idx];
 891     int hshift       = s->sps->hshift[c_idx];
 892     int vshift       = s->sps->vshift[c_idx];
 893     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 894                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 895     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 896     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 897
 898     int trafo_size = 1 << log2_trafo_size;
 899     int i, qp, shift, add, scale, scale_m;
 900     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 901     const uint8_t *scale_matrix;
 902     uint8_t dc_scale;
 903
 904     // Derive QP for dequant
 905     if (!lc->cu.cu_transquant_bypass_flag) {
 906         static const int qp_c[] = {
 907             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 908         };
 909
 910         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 911             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 912             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 913             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 914         };
 915
 916         static const uint8_t div6[51 + 2 * 6 + 1] = {
 917             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 918             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 919             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 920         };
 921         int qp_y = lc->qp_y;
 922
 923         if (c_idx == 0) {
 924             qp = qp_y + s->sps->qp_bd_offset;
 925         } else {
 926             int qp_i, offset;
 927
 928             if (c_idx == 1)
 929                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 930             else
 931                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 932
 933             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 934             if (qp_i < 30)
 935                 qp = qp_i;
 936             else if (qp_i > 43)
 937                 qp = qp_i - 6;
 938             else
 939                 qp = qp_c[qp_i - 30];
 940
 941             qp += s->sps->qp_bd_offset;
 942         }
 943
 944         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 945         add      = 1 << (shift - 1);
 946         scale    = level_scale[rem6[qp]] << (div6[qp]);
 947         scale_m  = 16; // default when no custom scaling lists.
 948         dc_scale = 16;
 949
 950         if (s->sps->scaling_list_enable_flag) {
 951             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 952                                     &s->pps->scaling_list : &s->sps->scaling_list;
 953             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 954
 955             if (log2_trafo_size != 5)
 956                 matrix_id = 3 * matrix_id + c_idx;
 957
 958             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 959             if (log2_trafo_size >= 4)
 960                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 961         }
 962     }
 963
 964     if (s->pps->transform_skip_enabled_flag &&
 965         !lc->cu.cu_transquant_bypass_flag   &&
 966         log2_trafo_size == 2) {
 967         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 968     }
 969
 970     last_significant_coeff_x =
 971         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 972     last_significant_coeff_y =
 973         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 974
 975     if (last_significant_coeff_x > 3) {
 976         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 977         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 978                                    (2 + (last_significant_coeff_x & 1)) +
 979                                    suffix;
 980     }
 981
 982     if (last_significant_coeff_y > 3) {
 983         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 984         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 985                                    (2 + (last_significant_coeff_y & 1)) +
 986                                    suffix;
 987     }
 988
 989     if (scan_idx == SCAN_VERT)
 990         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 991
 992     x_cg_last_sig = last_significant_coeff_x >> 2;
 993     y_cg_last_sig = last_significant_coeff_y >> 2;
 994
 995     switch (scan_idx) {
 996     case SCAN_DIAG: {
 997         int last_x_c = last_significant_coeff_x & 3;
 998         int last_y_c = last_significant_coeff_y & 3;
 999
1000         scan_x_off = ff_hevc_diag_scan4x4_x;
1001         scan_y_off = ff_hevc_diag_scan4x4_y;
1002         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1003         if (trafo_size == 4) {
1004             scan_x_cg = scan_1x1;
1005             scan_y_cg = scan_1x1;
1006         } else if (trafo_size == 8) {
1007             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1008             scan_x_cg  = diag_scan2x2_x;
1009             scan_y_cg  = diag_scan2x2_y;
1010         } else if (trafo_size == 16) {
1011             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1012             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1013             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1014         } else { // trafo_size == 32
1015             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1016             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1017             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1018         }
1019         break;
1020     }
1021     case SCAN_HORIZ:
1022         scan_x_cg  = horiz_scan2x2_x;
1023         scan_y_cg  = horiz_scan2x2_y;
1024         scan_x_off = horiz_scan4x4_x;
1025         scan_y_off = horiz_scan4x4_y;
1026         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1027         break;
1028     default: //SCAN_VERT
1029         scan_x_cg  = horiz_scan2x2_y;
1030         scan_y_cg  = horiz_scan2x2_x;
1031         scan_x_off = horiz_scan4x4_y;
1032         scan_y_off = horiz_scan4x4_x;
1033         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1034         break;
1035     }
1036     num_coeff++;
1037     num_last_subset = (num_coeff - 1) >> 4;
1038
1039     for (i = num_last_subset; i >= 0; i--) {
1040         int n, m;
1041         int x_cg, y_cg, x_c, y_c;
1042         int implicit_non_zero_coeff = 0;
1043         int64_t trans_coeff_level;
1044         int prev_sig = 0;
1045         int offset   = i << 4;
1046
1047         uint8_t significant_coeff_flag_idx[16];
1048         uint8_t nb_significant_coeff_flag = 0;
1049
1050         x_cg = scan_x_cg[i];
1051         y_cg = scan_y_cg[i];
1052
1053         if (i < num_last_subset && i > 0) {
1054             int ctx_cg = 0;
1055             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1056                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1057             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1058                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1059
1060             significant_coeff_group_flag[x_cg][y_cg] =
1061                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1062             implicit_non_zero_coeff = 1;
1063         } else {
1064             significant_coeff_group_flag[x_cg][y_cg] =
1065                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1066                  (x_cg == 0 && y_cg == 0));
1067         }
1068
1069         last_scan_pos = num_coeff - offset - 1;
1070
1071         if (i == num_last_subset) {
1072             n_end                         = last_scan_pos - 1;
1073             significant_coeff_flag_idx[0] = last_scan_pos;
1074             nb_significant_coeff_flag     = 1;
1075         } else {
1076             n_end = 15;
1077         }
1078
1079         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1080             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1081         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1082             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1083
1084         for (n = n_end; n >= 0; n--) {
1085             GET_COORD(offset, n);
1086
1087             if (significant_coeff_group_flag[x_cg][y_cg] &&
1088                 (n > 0 || implicit_non_zero_coeff == 0)) {
1089                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1090                                                           log2_trafo_size,
1091                                                           scan_idx,
1092                                                           prev_sig) == 1) {
1093                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1094                     nb_significant_coeff_flag++;
1095                     implicit_non_zero_coeff = 0;
1096                 }
1097             } else {
1098                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1099                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1100                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1101                     nb_significant_coeff_flag++;
1102                 }
1103             }
1104         }
1105
1106         n_end = nb_significant_coeff_flag;
1107
1108         if (n_end) {
1109             int first_nz_pos_in_cg = 16;
1110             int last_nz_pos_in_cg = -1;
1111             int c_rice_param = 0;
1112             int first_greater1_coeff_idx = -1;
1113             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1114             uint16_t coeff_sign_flag;
1115             int sum_abs = 0;
1116             int sign_hidden = 0;
1117
1118             // initialize first elem of coeff_bas_level_greater1_flag
1119             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1120
1121             if (!(i == num_last_subset) && greater1_ctx == 0)
1122                 ctx_set++;
1123             greater1_ctx      = 1;
1124             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1125
1126             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1127                 int n_idx = significant_coeff_flag_idx[m];
1128                 int inc   = (ctx_set << 2) + greater1_ctx;
1129                 coeff_abs_level_greater1_flag[n_idx] =
1130                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1131                 if (coeff_abs_level_greater1_flag[n_idx]) {
1132                     greater1_ctx = 0;
1133                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1134                     greater1_ctx++;
1135                 }
1136
1137                 if (coeff_abs_level_greater1_flag[n_idx] &&
1138                     first_greater1_coeff_idx == -1)
1139                     first_greater1_coeff_idx = n_idx;
1140             }
1141             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1142             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1143                                  !lc->cu.cu_transquant_bypass_flag;
1144
1145             if (first_greater1_coeff_idx != -1) {
1146                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1147             }
1148             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1149                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1150             } else {
1151                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1152             }
1153
1154             for (m = 0; m < n_end; m++) {
1155                 n = significant_coeff_flag_idx[m];
1156                 GET_COORD(offset, n);
1157                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1158                 if (trans_coeff_level == ((m < 8) ?
1159                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1160                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1161
1162                     trans_coeff_level += last_coeff_abs_level_remaining;
1163                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1164                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1165                 }
1166                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1167                     sum_abs += trans_coeff_level;
1168                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1169                         trans_coeff_level = -trans_coeff_level;
1170                 }
1171                 if (coeff_sign_flag >> 15)
1172                     trans_coeff_level = -trans_coeff_level;
1173                 coeff_sign_flag <<= 1;
1174                 if (!lc->cu.cu_transquant_bypass_flag) {
1175                     if (s->sps->scaling_list_enable_flag) {
1176                         if (y_c || x_c || log2_trafo_size < 4) {
1177                             int pos;
1178                             switch (log2_trafo_size) {
1179                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1180                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1181                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1182                             default: pos = (y_c        << 2) +  x_c;
1183                             }
1184                             scale_m = scale_matrix[pos];
1185                         } else {
1186                             scale_m = dc_scale;
1187                         }
1188                     }
1189                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1190                     if(trans_coeff_level < 0) {
1191                         if((~trans_coeff_level) & 0xFffffffffff8000)
1192                             trans_coeff_level = -32768;
1193                     } else {
1194                         if (trans_coeff_level & 0xffffffffffff8000)
1195                             trans_coeff_level = 32767;
1196                     }
1197                 }
1198                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1199             }
1200         }
1201     }
1202
1203     if (lc->cu.cu_transquant_bypass_flag) {
1204         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1205     } else {
1206         if (transform_skip_flag)
1207             s->hevcdsp.transform_skip(dst, coeffs, stride);
1208         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1209                  log2_trafo_size == 2)
1210             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1211         else
1212             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1213     }
1214 }
1215
1216 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1217                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1218                                int log2_cb_size, int log2_trafo_size,
1219                                int trafo_depth, int blk_idx)
1220 {
1221     HEVCLocalContext *lc = &s->HEVClc;
1222
1223     if (lc->cu.pred_mode == MODE_INTRA) {
1224         int trafo_size = 1 << log2_trafo_size;
1225         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1226
1227         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1228         if (log2_trafo_size > 2) {
1229             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1230             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1231             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1232             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1233         } else if (blk_idx == 3) {
1234             trafo_size = trafo_size << s->sps->hshift[1];
1235             ff_hevc_set_neighbour_available(s, xBase, yBase,
1236                                             trafo_size, trafo_size);
1237             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1238             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1239         }
1240     }
1241
1242     if (lc->tt.cbf_luma ||
1243         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1244         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1245         int scan_idx   = SCAN_DIAG;
1246         int scan_idx_c = SCAN_DIAG;
1247
1248         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1249             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1250             if (lc->tu.cu_qp_delta != 0)
1251                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1252                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1253             lc->tu.is_cu_qp_delta_coded = 1;
1254             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1255         }
1256
1257         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1258             if (lc->tu.cur_intra_pred_mode >= 6 &&
1259                 lc->tu.cur_intra_pred_mode <= 14) {
1260                 scan_idx = SCAN_VERT;
1261             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1262                        lc->tu.cur_intra_pred_mode <= 30) {
1263                 scan_idx = SCAN_HORIZ;
1264             }
1265
1266             if (lc->pu.intra_pred_mode_c >=  6 &&
1267                 lc->pu.intra_pred_mode_c <= 14) {
1268                 scan_idx_c = SCAN_VERT;
1269             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1270                        lc->pu.intra_pred_mode_c <= 30) {
1271                 scan_idx_c = SCAN_HORIZ;
1272             }
1273         }
1274
1275         if (lc->tt.cbf_luma)
1276             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1277         if (log2_trafo_size > 2) {
1278             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1279                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1280             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1281                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1282         } else if (blk_idx == 3) {
1283             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1284                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1285             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1286                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1287         }
1288     }
1289 }
1290
1291 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1292 {
1293     int cb_size          = 1 << log2_cb_size;
1294     int log2_min_pu_size = s->sps->log2_min_pu_size;
1295
1296     int min_pu_width     = s->sps->min_pu_width;
1297     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1298     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1299     int i, j;
1300
1301     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1302         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1303             s->is_pcm[i + j * min_pu_width] = 2;
1304 }
1305
1306 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1307                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1308                                int log2_cb_size, int log2_trafo_size,
1309                                int trafo_depth, int blk_idx)
1310 {
1311     HEVCLocalContext *lc = &s->HEVClc;
1312     uint8_t split_transform_flag;
1313
1314     if (trafo_depth > 0 && log2_trafo_size == 2) {
1315         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1316             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1317         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1318             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1319     } else {
1320         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1321         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1322     }
1323
1324     if (lc->cu.intra_split_flag) {
1325         if (trafo_depth == 1)
1326             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1327     } else {
1328         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1329     }
1330
1331     lc->tt.cbf_luma = 1;
1332
1333     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1334                               lc->cu.pred_mode == MODE_INTER &&
1335                               lc->cu.part_mode != PART_2Nx2N &&
1336                               trafo_depth == 0;
1337
1338     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1339         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1340         trafo_depth     < lc->cu.max_trafo_depth       &&
1341         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1342         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1343     } else {
1344         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1345                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1346                                lc->tt.inter_split_flag;
1347     }
1348
1349     if (log2_trafo_size > 2) {
1350         if (trafo_depth == 0 ||
1351             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1352             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1353                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1354         }
1355
1356         if (trafo_depth == 0 ||
1357             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1358             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1359                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1360         }
1361     }
1362
1363     if (split_transform_flag) {
1364         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1365         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1366
1367         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1368                            log2_trafo_size - 1, trafo_depth + 1, 0);
1369         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1370                            log2_trafo_size - 1, trafo_depth + 1, 1);
1371         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1372                            log2_trafo_size - 1, trafo_depth + 1, 2);
1373         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1374                            log2_trafo_size - 1, trafo_depth + 1, 3);
1375     } else {
1376         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1377         int log2_min_tu_size = s->sps->log2_min_tb_size;
1378         int min_tu_width     = s->sps->min_tb_width;
1379
1380         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1381             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1382             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1383             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1384         }
1385
1386         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1387                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1388
1389         // TODO: store cbf_luma somewhere else
1390         if (lc->tt.cbf_luma) {
1391             int i, j;
1392             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1393                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1394                     int x_tu = (x0 + j) >> log2_min_tu_size;
1395                     int y_tu = (y0 + i) >> log2_min_tu_size;
1396                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1397                 }
1398         }
1399         if (!s->sh.disable_deblocking_filter_flag) {
1400             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1401                                                   lc->slice_or_tiles_up_boundary,
1402                                                   lc->slice_or_tiles_left_boundary);
1403             if (s->pps->transquant_bypass_enable_flag &&
1404                 lc->cu.cu_transquant_bypass_flag)
1405                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1406         }
1407     }
1408 }
1409
1410 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1411 {
1412     //TODO: non-4:2:0 support
1413     HEVCLocalContext *lc = &s->HEVClc;
1414     GetBitContext gb;
1415     int cb_size   = 1 << log2_cb_size;
1416     int stride0   = s->frame->linesize[0];
1417     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1418     int   stride1 = s->frame->linesize[1];
1419     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1420     int   stride2 = s->frame->linesize[2];
1421     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1422
1423     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1424     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1425     int ret;
1426
1427     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1428                                           lc->slice_or_tiles_up_boundary,
1429                                           lc->slice_or_tiles_left_boundary);
1430
1431     ret = init_get_bits(&gb, pcm, length);
1432     if (ret < 0)
1433         return ret;
1434
1435     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1436     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1437     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1438     return 0;
1439 }
1440
1441 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1442 {
1443     HEVCLocalContext *lc = &s->HEVClc;
1444     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1445     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1446
1447     if (x)
1448         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1449     if (y)
1450         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1451
1452     switch (x) {
1453     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1454     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1455     case 0: lc->pu.mvd.x = 0;                               break;
1456     }
1457
1458     switch (y) {
1459     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1460     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1461     case 0: lc->pu.mvd.y = 0;                               break;
1462     }
1463 }
1464
1465 /**
1466  * 8.5.3.2.2.1 Luma sample interpolation process
1467  *
1468  * @param s HEVC decoding context
1469  * @param dst target buffer for block data at block position
1470  * @param dststride stride of the dst buffer
1471  * @param ref reference picture buffer at origin (0, 0)
1472  * @param mv motion vector (relative to block position) to get pixel data from
1473  * @param x_off horizontal position of block from origin (0, 0)
1474  * @param y_off vertical position of block from origin (0, 0)
1475  * @param block_w width of block
1476  * @param block_h height of block
1477  */
1478 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1479                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1480                     int block_w, int block_h)
1481 {
1482     HEVCLocalContext *lc = &s->HEVClc;
1483     uint8_t *src         = ref->data[0];
1484     ptrdiff_t srcstride  = ref->linesize[0];
1485     int pic_width        = s->sps->width;
1486     int pic_height       = s->sps->height;
1487
1488     int mx         = mv->x & 3;
1489     int my         = mv->y & 3;
1490     int extra_left = ff_hevc_qpel_extra_before[mx];
1491     int extra_top  = ff_hevc_qpel_extra_before[my];
1492
1493     x_off += mv->x >> 2;
1494     y_off += mv->y >> 2;
1495     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1496
1497     if (x_off < extra_left || y_off < extra_top ||
1498         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1499         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1500         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1501
1502         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1503                                  srcstride, srcstride,
1504                                  block_w + ff_hevc_qpel_extra[mx],
1505                                  block_h + ff_hevc_qpel_extra[my],
1506                                  x_off - extra_left, y_off - extra_top,
1507                                  pic_width, pic_height);
1508         src = lc->edge_emu_buffer + offset;
1509     }
1510     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1511                                      block_h, lc->mc_buffer);
1512 }
1513
1514 /**
1515  * 8.5.3.2.2.2 Chroma sample interpolation process
1516  *
1517  * @param s HEVC decoding context
1518  * @param dst1 target buffer for block data at block position (U plane)
1519  * @param dst2 target buffer for block data at block position (V plane)
1520  * @param dststride stride of the dst1 and dst2 buffers
1521  * @param ref reference picture buffer at origin (0, 0)
1522  * @param mv motion vector (relative to block position) to get pixel data from
1523  * @param x_off horizontal position of block from origin (0, 0)
1524  * @param y_off vertical position of block from origin (0, 0)
1525  * @param block_w width of block
1526  * @param block_h height of block
1527  */
1528 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1529                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1530                       int x_off, int y_off, int block_w, int block_h)
1531 {
1532     HEVCLocalContext *lc = &s->HEVClc;
1533     uint8_t *src1        = ref->data[1];
1534     uint8_t *src2        = ref->data[2];
1535     ptrdiff_t src1stride = ref->linesize[1];
1536     ptrdiff_t src2stride = ref->linesize[2];
1537     int pic_width        = s->sps->width >> 1;
1538     int pic_height       = s->sps->height >> 1;
1539
1540     int mx = mv->x & 7;
1541     int my = mv->y & 7;
1542
1543     x_off += mv->x >> 3;
1544     y_off += mv->y >> 3;
1545     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1546     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1547
1548     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1549         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1550         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1551         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1552         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1553
1554         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1555                                  src1stride, src1stride,
1556                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1557                                  x_off - EPEL_EXTRA_BEFORE,
1558                                  y_off - EPEL_EXTRA_BEFORE,
1559                                  pic_width, pic_height);
1560
1561         src1 = lc->edge_emu_buffer + offset1;
1562         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1563                                              block_w, block_h, mx, my, lc->mc_buffer);
1564
1565         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1566                                  src2stride, src2stride,
1567                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1568                                  x_off - EPEL_EXTRA_BEFORE,
1569                                  y_off - EPEL_EXTRA_BEFORE,
1570                                  pic_width, pic_height);
1571         src2 = lc->edge_emu_buffer + offset2;
1572         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1573                                              block_w, block_h, mx, my,
1574                                              lc->mc_buffer);
1575     } else {
1576         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1577                                              block_w, block_h, mx, my,
1578                                              lc->mc_buffer);
1579         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1580                                              block_w, block_h, mx, my,
1581                                              lc->mc_buffer);
1582     }
1583 }
1584
1585 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1586                                 const Mv *mv, int y0, int height)
1587 {
1588     int y = (mv->y >> 2) + y0 + height + 9;
1589     ff_thread_await_progress(&ref->tf, y, 0);
1590 }
1591
1592 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1593                                 int nPbW, int nPbH,
1594                                 int log2_cb_size, int partIdx)
1595 {
1596 #define POS(c_idx, x, y)                                                              \
1597     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1598                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1599     HEVCLocalContext *lc = &s->HEVClc;
1600     int merge_idx = 0;
1601     struct MvField current_mv = {{{ 0 }}};
1602
1603     int min_pu_width = s->sps->min_pu_width;
1604
1605     MvField *tab_mvf = s->ref->tab_mvf;
1606     RefPicList  *refPicList = s->ref->refPicList;
1607     HEVCFrame *ref0, *ref1;
1608
1609     int tmpstride = MAX_PB_SIZE;
1610
1611     uint8_t *dst0 = POS(0, x0, y0);
1612     uint8_t *dst1 = POS(1, x0, y0);
1613     uint8_t *dst2 = POS(2, x0, y0);
1614     int log2_min_cb_size = s->sps->log2_min_cb_size;
1615     int min_cb_width     = s->sps->min_cb_width;
1616     int x_cb             = x0 >> log2_min_cb_size;
1617     int y_cb             = y0 >> log2_min_cb_size;
1618     int ref_idx[2];
1619     int mvp_flag[2];
1620     int x_pu, y_pu;
1621     int i, j;
1622
1623     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1624         if (s->sh.max_num_merge_cand > 1)
1625             merge_idx = ff_hevc_merge_idx_decode(s);
1626         else
1627             merge_idx = 0;
1628
1629         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1630                                    1 << log2_cb_size,
1631                                    1 << log2_cb_size,
1632                                    log2_cb_size, partIdx,
1633                                    merge_idx, &current_mv);
1634         x_pu = x0 >> s->sps->log2_min_pu_size;
1635         y_pu = y0 >> s->sps->log2_min_pu_size;
1636
1637         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1638             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1639                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1640     } else { /* MODE_INTER */
1641         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1642         if (lc->pu.merge_flag) {
1643             if (s->sh.max_num_merge_cand > 1)
1644                 merge_idx = ff_hevc_merge_idx_decode(s);
1645             else
1646                 merge_idx = 0;
1647
1648             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1649                                        partIdx, merge_idx, &current_mv);
1650             x_pu = x0 >> s->sps->log2_min_pu_size;
1651             y_pu = y0 >> s->sps->log2_min_pu_size;
1652
1653             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1654                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1655                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1656         } else {
1657             enum InterPredIdc inter_pred_idc = PRED_L0;
1658             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1659             if (s->sh.slice_type == B_SLICE)
1660                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1661
1662             if (inter_pred_idc != PRED_L1) {
1663                 if (s->sh.nb_refs[L0]) {
1664                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1665                     current_mv.ref_idx[0] = ref_idx[0];
1666                 }
1667                 current_mv.pred_flag[0] = 1;
1668                 hls_mvd_coding(s, x0, y0, 0);
1669                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1670                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1671                                          partIdx, merge_idx, &current_mv,
1672                                          mvp_flag[0], 0);
1673                 current_mv.mv[0].x += lc->pu.mvd.x;
1674                 current_mv.mv[0].y += lc->pu.mvd.y;
1675             }
1676
1677             if (inter_pred_idc != PRED_L0) {
1678                 if (s->sh.nb_refs[L1]) {
1679                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1680                     current_mv.ref_idx[1] = ref_idx[1];
1681                 }
1682
1683                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1684                     lc->pu.mvd.x = 0;
1685                     lc->pu.mvd.y = 0;
1686                 } else {
1687                     hls_mvd_coding(s, x0, y0, 1);
1688                 }
1689
1690                 current_mv.pred_flag[1] = 1;
1691                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1692                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1693                                          partIdx, merge_idx, &current_mv,
1694                                          mvp_flag[1], 1);
1695                 current_mv.mv[1].x += lc->pu.mvd.x;
1696                 current_mv.mv[1].y += lc->pu.mvd.y;
1697             }
1698
1699             x_pu = x0 >> s->sps->log2_min_pu_size;
1700             y_pu = y0 >> s->sps->log2_min_pu_size;
1701
1702             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1703                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1704                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1705         }
1706     }
1707
1708     if (current_mv.pred_flag[0]) {
1709         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1710         if (!ref0)
1711             return;
1712         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1713     }
1714     if (current_mv.pred_flag[1]) {
1715         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1716         if (!ref1)
1717             return;
1718         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1719     }
1720
1721     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1722         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1723         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1724
1725         luma_mc(s, tmp, tmpstride, ref0->frame,
1726                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1727
1728         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1729             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1730             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1731                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1732                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1733                                      dst0, s->frame->linesize[0], tmp,
1734                                      tmpstride, nPbW, nPbH);
1735         } else {
1736             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1737         }
1738         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1739                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1740
1741         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1742             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1743             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1744                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1745                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1746                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1747                                      nPbW / 2, nPbH / 2);
1748             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1749                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1750                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1751                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1752                                      nPbW / 2, nPbH / 2);
1753         } else {
1754             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1755             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1756         }
1757     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1758         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1759         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1760
1761         if (!ref1)
1762             return;
1763
1764         luma_mc(s, tmp, tmpstride, ref1->frame,
1765                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1766
1767         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1768             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1769             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1770                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1771                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1772                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1773                                       nPbW, nPbH);
1774         } else {
1775             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1776         }
1777
1778         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1779                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1780
1781         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1782             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1783             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1784                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1785                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1786                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1787             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1788                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1789                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1790                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1791         } else {
1792             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1793             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1794         }
1795     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1796         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1797         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1798         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1799         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1800         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1801         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1802
1803         if (!ref0 || !ref1)
1804             return;
1805
1806         luma_mc(s, tmp, tmpstride, ref0->frame,
1807                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1808         luma_mc(s, tmp2, tmpstride, ref1->frame,
1809                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1810
1811         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1812             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1813             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1814                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1815                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1816                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1817                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1818                                          dst0, s->frame->linesize[0],
1819                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1820         } else {
1821             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1822                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1823         }
1824
1825         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1826                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1827         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1828                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1829
1830         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1831             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1832             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1833                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1834                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1835                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1836                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1837                                          dst1, s->frame->linesize[1], tmp, tmp3,
1838                                          tmpstride, nPbW / 2, nPbH / 2);
1839             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1840                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1841                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1842                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1843                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1844                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1845                                          tmpstride, nPbW / 2, nPbH / 2);
1846         } else {
1847             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1848             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1849         }
1850     }
1851 }
1852
1853 /**
1854  * 8.4.1
1855  */
1856 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1857                                 int prev_intra_luma_pred_flag)
1858 {
1859     HEVCLocalContext *lc = &s->HEVClc;
1860     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1861     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1862     int min_pu_width     = s->sps->min_pu_width;
1863     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1864     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1865     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1866
1867     int cand_up   = (lc->ctb_up_flag || y0b) ?
1868                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1869     int cand_left = (lc->ctb_left_flag || x0b) ?
1870                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1871
1872     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1873
1874     MvField *tab_mvf = s->ref->tab_mvf;
1875     int intra_pred_mode;
1876     int candidate[3];
1877     int i, j;
1878
1879     // intra_pred_mode prediction does not cross vertical CTB boundaries
1880     if ((y0 - 1) < y_ctb)
1881         cand_up = INTRA_DC;
1882
1883     if (cand_left == cand_up) {
1884         if (cand_left < 2) {
1885             candidate[0] = INTRA_PLANAR;
1886             candidate[1] = INTRA_DC;
1887             candidate[2] = INTRA_ANGULAR_26;
1888         } else {
1889             candidate[0] = cand_left;
1890             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1891             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1892         }
1893     } else {
1894         candidate[0] = cand_left;
1895         candidate[1] = cand_up;
1896         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1897             candidate[2] = INTRA_PLANAR;
1898         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1899             candidate[2] = INTRA_DC;
1900         } else {
1901             candidate[2] = INTRA_ANGULAR_26;
1902         }
1903     }
1904
1905     if (prev_intra_luma_pred_flag) {
1906         intra_pred_mode = candidate[lc->pu.mpm_idx];
1907     } else {
1908         if (candidate[0] > candidate[1])
1909             FFSWAP(uint8_t, candidate[0], candidate[1]);
1910         if (candidate[0] > candidate[2])
1911             FFSWAP(uint8_t, candidate[0], candidate[2]);
1912         if (candidate[1] > candidate[2])
1913             FFSWAP(uint8_t, candidate[1], candidate[2]);
1914
1915         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1916         for (i = 0; i < 3; i++)
1917             if (intra_pred_mode >= candidate[i])
1918                 intra_pred_mode++;
1919     }
1920
1921     /* write the intra prediction units into the mv array */
1922     if (!size_in_pus)
1923         size_in_pus = 1;
1924     for (i = 0; i < size_in_pus; i++) {
1925         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1926                intra_pred_mode, size_in_pus);
1927
1928         for (j = 0; j < size_in_pus; j++) {
1929             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1930             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1931             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1932             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1933             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1934             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1935             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1936             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1937             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1938         }
1939     }
1940
1941     return intra_pred_mode;
1942 }
1943
1944 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1945                                           int log2_cb_size, int ct_depth)
1946 {
1947     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1948     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1949     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1950     int y;
1951
1952     for (y = 0; y < length; y++)
1953         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1954                ct_depth, length);
1955 }
1956
1957 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1958                                   int log2_cb_size)
1959 {
1960     HEVCLocalContext *lc = &s->HEVClc;
1961     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1962     uint8_t prev_intra_luma_pred_flag[4];
1963     int split   = lc->cu.part_mode == PART_NxN;
1964     int pb_size = (1 << log2_cb_size) >> split;
1965     int side    = split + 1;
1966     int chroma_mode;
1967     int i, j;
1968
1969     for (i = 0; i < side; i++)
1970         for (j = 0; j < side; j++)
1971             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1972
1973     for (i = 0; i < side; i++) {
1974         for (j = 0; j < side; j++) {
1975             if (prev_intra_luma_pred_flag[2 * i + j])
1976                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1977             else
1978                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1979
1980             lc->pu.intra_pred_mode[2 * i + j] =
1981                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1982                                      prev_intra_luma_pred_flag[2 * i + j]);
1983         }
1984     }
1985
1986     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1987     if (chroma_mode != 4) {
1988         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1989             lc->pu.intra_pred_mode_c = 34;
1990         else
1991             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1992     } else {
1993         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1994     }
1995 }
1996
1997 static void intra_prediction_unit_default_value(HEVCContext *s,
1998                                                 int x0, int y0,
1999                                                 int log2_cb_size)
2000 {
2001     HEVCLocalContext *lc = &s->HEVClc;
2002     int pb_size          = 1 << log2_cb_size;
2003     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2004     int min_pu_width     = s->sps->min_pu_width;
2005     MvField *tab_mvf     = s->ref->tab_mvf;
2006     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2007     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2008     int j, k;
2009
2010     if (size_in_pus == 0)
2011         size_in_pus = 1;
2012     for (j = 0; j < size_in_pus; j++) {
2013         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2014         for (k = 0; k < size_in_pus; k++)
2015             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2016     }
2017 }
2018
2019 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2020 {
2021     int cb_size          = 1 << log2_cb_size;
2022     HEVCLocalContext *lc = &s->HEVClc;
2023     int log2_min_cb_size = s->sps->log2_min_cb_size;
2024     int length           = cb_size >> log2_min_cb_size;
2025     int min_cb_width     = s->sps->min_cb_width;
2026     int x_cb             = x0 >> log2_min_cb_size;
2027     int y_cb             = y0 >> log2_min_cb_size;
2028     int x, y;
2029
2030     lc->cu.x                = x0;
2031     lc->cu.y                = y0;
2032     lc->cu.rqt_root_cbf     = 1;
2033     lc->cu.pred_mode        = MODE_INTRA;
2034     lc->cu.part_mode        = PART_2Nx2N;
2035     lc->cu.intra_split_flag = 0;
2036     lc->cu.pcm_flag         = 0;
2037
2038     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2039     for (x = 0; x < 4; x++)
2040         lc->pu.intra_pred_mode[x] = 1;
2041     if (s->pps->transquant_bypass_enable_flag) {
2042         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2043         if (lc->cu.cu_transquant_bypass_flag)
2044             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2045     } else
2046         lc->cu.cu_transquant_bypass_flag = 0;
2047
2048     if (s->sh.slice_type != I_SLICE) {
2049         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2050
2051         lc->cu.pred_mode = MODE_SKIP;
2052         x = y_cb * min_cb_width + x_cb;
2053         for (y = 0; y < length; y++) {
2054             memset(&s->skip_flag[x], skip_flag, length);
2055             x += min_cb_width;
2056         }
2057         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2058     }
2059
2060     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2061         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2062         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2063
2064         if (!s->sh.disable_deblocking_filter_flag)
2065             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2066                                                   lc->slice_or_tiles_up_boundary,
2067                                                   lc->slice_or_tiles_left_boundary);
2068     } else {
2069         if (s->sh.slice_type != I_SLICE)
2070             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2071         if (lc->cu.pred_mode != MODE_INTRA ||
2072             log2_cb_size == s->sps->log2_min_cb_size) {
2073             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2074             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2075                                       lc->cu.pred_mode == MODE_INTRA;
2076         }
2077
2078         if (lc->cu.pred_mode == MODE_INTRA) {
2079             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2080                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2081                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2082                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2083             }
2084             if (lc->cu.pcm_flag) {
2085                 int ret;
2086                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2087                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2088                 if (s->sps->pcm.loop_filter_disable_flag)
2089                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2090
2091                 if (ret < 0)
2092                     return ret;
2093             } else {
2094                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2095             }
2096         } else {
2097             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2098             switch (lc->cu.part_mode) {
2099             case PART_2Nx2N:
2100                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2101                 break;
2102             case PART_2NxN:
2103                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2104                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2105                 break;
2106             case PART_Nx2N:
2107                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2108                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2109                 break;
2110             case PART_2NxnU:
2111                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2112                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2113                 break;
2114             case PART_2NxnD:
2115                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2116                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2117                 break;
2118             case PART_nLx2N:
2119                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2120                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2121                 break;
2122             case PART_nRx2N:
2123                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2124                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2125                 break;
2126             case PART_NxN:
2127                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2128                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2129                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2130                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2131                 break;
2132             }
2133         }
2134
2135         if (!lc->cu.pcm_flag) {
2136             if (lc->cu.pred_mode != MODE_INTRA &&
2137                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2138                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2139             }
2140             if (lc->cu.rqt_root_cbf) {
2141                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2142                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2143                                          s->sps->max_transform_hierarchy_depth_inter;
2144                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2145                                    log2_cb_size, 0, 0);
2146             } else {
2147                 if (!s->sh.disable_deblocking_filter_flag)
2148                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2149                                                           lc->slice_or_tiles_up_boundary,
2150                                                           lc->slice_or_tiles_left_boundary);
2151             }
2152         }
2153     }
2154
2155     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2156         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2157
2158     x = y_cb * min_cb_width + x_cb;
2159     for (y = 0; y < length; y++) {
2160         memset(&s->qp_y_tab[x], lc->qp_y, length);
2161         x += min_cb_width;
2162     }
2163
2164     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2165
2166     return 0;
2167 }
2168
2169 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2170                                int log2_cb_size, int cb_depth)
2171 {
2172     HEVCLocalContext *lc = &s->HEVClc;
2173     const int cb_size    = 1 << log2_cb_size;
2174
2175     lc->ct.depth = cb_depth;
2176     if (x0 + cb_size <= s->sps->width  &&
2177         y0 + cb_size <= s->sps->height &&
2178         log2_cb_size > s->sps->log2_min_cb_size) {
2179         SAMPLE(s->split_cu_flag, x0, y0) =
2180             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2181     } else {
2182         SAMPLE(s->split_cu_flag, x0, y0) =
2183             (log2_cb_size > s->sps->log2_min_cb_size);
2184     }
2185     if (s->pps->cu_qp_delta_enabled_flag &&
2186         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2187         lc->tu.is_cu_qp_delta_coded = 0;
2188         lc->tu.cu_qp_delta          = 0;
2189     }
2190
2191     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2192         const int cb_size_split = cb_size >> 1;
2193         const int x1 = x0 + cb_size_split;
2194         const int y1 = y0 + cb_size_split;
2195
2196         log2_cb_size--;
2197         cb_depth++;
2198
2199 #define SUBDIVIDE(x, y)                                                \
2200 do {                                                                   \
2201     if (x < s->sps->width && y < s->sps->height) {                     \
2202         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2203         if (ret < 0)                                                   \
2204             return ret;                                                \
2205     }                                                                  \
2206 } while (0)
2207
2208         SUBDIVIDE(x0, y0);
2209         SUBDIVIDE(x1, y0);
2210         SUBDIVIDE(x0, y1);
2211         SUBDIVIDE(x1, y1);
2212     } else {
2213         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2214         if (ret < 0)
2215             return ret;
2216     }
2217
2218     return 0;
2219 }
2220
2221 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2222                                  int ctb_addr_ts)
2223 {
2224     HEVCLocalContext *lc  = &s->HEVClc;
2225     int ctb_size          = 1 << s->sps->log2_ctb_size;
2226     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2227     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2228
2229     int tile_left_boundary, tile_up_boundary;
2230     int slice_left_boundary, slice_up_boundary;
2231
2232     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2233
2234     if (s->pps->entropy_coding_sync_enabled_flag) {
2235         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2236             lc->first_qp_group = 1;
2237         lc->end_of_tiles_x = s->sps->width;
2238     } else if (s->pps->tiles_enabled_flag) {
2239         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2240             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2241             lc->start_of_tiles_x = x_ctb;
2242             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2243             lc->first_qp_group   = 1;
2244         }
2245     } else {
2246         lc->end_of_tiles_x = s->sps->width;
2247     }
2248
2249     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2250
2251     if (s->pps->tiles_enabled_flag) {
2252         tile_left_boundary  = x_ctb > 0 &&
2253                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2254         slice_left_boundary = x_ctb > 0 &&
2255                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2256         tile_up_boundary  = y_ctb > 0 &&
2257                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2258         slice_up_boundary = y_ctb > 0 &&
2259                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2260     } else {
2261         tile_left_boundary  =
2262         tile_up_boundary    = 1;
2263         slice_left_boundary = ctb_addr_in_slice > 0;
2264         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2265     }
2266     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2267     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2268     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2269     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2270     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2271     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2272 }
2273
2274 static int hls_slice_data(HEVCContext *s)
2275 {
2276     int ctb_size    = 1 << s->sps->log2_ctb_size;
2277     int more_data   = 1;
2278     int x_ctb       = 0;
2279     int y_ctb       = 0;
2280     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2281     int ret;
2282
2283     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2284         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2285
2286         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2287         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2288         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2289
2290         ff_hevc_cabac_init(s, ctb_addr_ts);
2291
2292         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2293
2294         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2295         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2296         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2297
2298         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2299         if (ret < 0)
2300             return ret;
2301         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2302
2303         ctb_addr_ts++;
2304         ff_hevc_save_states(s, ctb_addr_ts);
2305         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2306     }
2307
2308     if (x_ctb + ctb_size >= s->sps->width &&
2309         y_ctb + ctb_size >= s->sps->height)
2310         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2311
2312     return ctb_addr_ts;
2313 }
2314
2315 /**
2316  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2317  * 0 if the unit should be skipped, 1 otherwise
2318  */
2319 static int hls_nal_unit(HEVCContext *s)
2320 {
2321     GetBitContext *gb = &s->HEVClc.gb;
2322     int nuh_layer_id;
2323
2324     if (get_bits1(gb) != 0)
2325         return AVERROR_INVALIDDATA;
2326
2327     s->nal_unit_type = get_bits(gb, 6);
2328
2329     nuh_layer_id   = get_bits(gb, 6);
2330     s->temporal_id = get_bits(gb, 3) - 1;
2331     if (s->temporal_id < 0)
2332         return AVERROR_INVALIDDATA;
2333
2334     av_log(s->avctx, AV_LOG_DEBUG,
2335            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2336            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2337
2338     return nuh_layer_id == 0;
2339 }
2340
2341 static void restore_tqb_pixels(HEVCContext *s)
2342 {
2343     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2344     int x, y, c_idx;
2345
2346     for (c_idx = 0; c_idx < 3; c_idx++) {
2347         ptrdiff_t stride = s->frame->linesize[c_idx];
2348         int hshift       = s->sps->hshift[c_idx];
2349         int vshift       = s->sps->vshift[c_idx];
2350         for (y = 0; y < s->sps->min_pu_height; y++) {
2351             for (x = 0; x < s->sps->min_pu_width; x++) {
2352                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2353                     int n;
2354                     int len      = min_pu_size >> hshift;
2355                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2356                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2357                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2358                         memcpy(dst, src, len);
2359                         src += stride;
2360                         dst += stride;
2361                     }
2362                 }
2363             }
2364         }
2365     }
2366 }
2367
2368 static int set_side_data(HEVCContext *s)
2369 {
2370     AVFrame *out = s->ref->frame;
2371
2372     if (s->sei_frame_packing_present &&
2373         s->frame_packing_arrangement_type >= 3 &&
2374         s->frame_packing_arrangement_type <= 5 &&
2375         s->content_interpretation_type > 0 &&
2376         s->content_interpretation_type < 3) {
2377         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2378         if (!stereo)
2379             return AVERROR(ENOMEM);
2380
2381         switch (s->frame_packing_arrangement_type) {
2382         case 3:
2383             if (s->quincunx_subsampling)
2384                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2385             else
2386                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2387             break;
2388         case 4:
2389             stereo->type = AV_STEREO3D_TOPBOTTOM;
2390             break;
2391         case 5:
2392             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2393             break;
2394         }
2395
2396         if (s->content_interpretation_type == 2)
2397             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2398     }
2399
2400     return 0;
2401 }
2402
2403 static int hevc_frame_start(HEVCContext *s)
2404 {
2405     HEVCLocalContext *lc = &s->HEVClc;
2406     int ret;
2407
2408     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2409     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2410     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2411     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2412
2413     lc->start_of_tiles_x = 0;
2414     s->is_decoded        = 0;
2415
2416     if (s->pps->tiles_enabled_flag)
2417         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2418
2419     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2420                               s->poc);
2421     if (ret < 0)
2422         goto fail;
2423
2424     av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
2425                    (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
2426     if (!lc->edge_emu_buffer) {
2427         ret = AVERROR(ENOMEM);
2428         goto fail;
2429     }
2430
2431     ret = ff_hevc_frame_rps(s);
2432     if (ret < 0) {
2433         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2434         goto fail;
2435     }
2436
2437     ret = set_side_data(s);
2438     if (ret < 0)
2439         goto fail;
2440
2441     av_frame_unref(s->output_frame);
2442     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2443     if (ret < 0)
2444         goto fail;
2445
2446     ff_thread_finish_setup(s->avctx);
2447
2448     return 0;
2449
2450 fail:
2451     if (s->ref)
2452         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2453     s->ref = NULL;
2454     return ret;
2455 }
2456
2457 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2458 {
2459     HEVCLocalContext *lc = &s->HEVClc;
2460     GetBitContext *gb    = &lc->gb;
2461     int ctb_addr_ts, ret;
2462
2463     ret = init_get_bits8(gb, nal, length);
2464     if (ret < 0)
2465         return ret;
2466
2467     ret = hls_nal_unit(s);
2468     if (ret < 0) {
2469         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2470                s->nal_unit_type);
2471         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2472             return ret;
2473         return 0;
2474     } else if (!ret)
2475         return 0;
2476
2477     switch (s->nal_unit_type) {
2478     case NAL_VPS:
2479         ret = ff_hevc_decode_nal_vps(s);
2480         if (ret < 0)
2481             return ret;
2482         break;
2483     case NAL_SPS:
2484         ret = ff_hevc_decode_nal_sps(s);
2485         if (ret < 0)
2486             return ret;
2487         break;
2488     case NAL_PPS:
2489         ret = ff_hevc_decode_nal_pps(s);
2490         if (ret < 0)
2491             return ret;
2492         break;
2493     case NAL_SEI_PREFIX:
2494     case NAL_SEI_SUFFIX:
2495         ret = ff_hevc_decode_nal_sei(s);
2496         if (ret < 0)
2497             return ret;
2498         break;
2499     case NAL_TRAIL_R:
2500     case NAL_TRAIL_N:
2501     case NAL_TSA_N:
2502     case NAL_TSA_R:
2503     case NAL_STSA_N:
2504     case NAL_STSA_R:
2505     case NAL_BLA_W_LP:
2506     case NAL_BLA_W_RADL:
2507     case NAL_BLA_N_LP:
2508     case NAL_IDR_W_RADL:
2509     case NAL_IDR_N_LP:
2510     case NAL_CRA_NUT:
2511     case NAL_RADL_N:
2512     case NAL_RADL_R:
2513     case NAL_RASL_N:
2514     case NAL_RASL_R:
2515         ret = hls_slice_header(s);
2516         if (ret < 0)
2517             return ret;
2518
2519         if (s->max_ra == INT_MAX) {
2520             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2521                 s->max_ra = s->poc;
2522             } else {
2523                 if (IS_IDR(s))
2524                     s->max_ra = INT_MIN;
2525             }
2526         }
2527
2528         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2529             s->poc <= s->max_ra) {
2530             s->is_decoded = 0;
2531             break;
2532         } else {
2533             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2534                 s->max_ra = INT_MIN;
2535         }
2536
2537         if (s->sh.first_slice_in_pic_flag) {
2538             ret = hevc_frame_start(s);
2539             if (ret < 0)
2540                 return ret;
2541         } else if (!s->ref) {
2542             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2543             return AVERROR_INVALIDDATA;
2544         }
2545
2546         if (!s->sh.dependent_slice_segment_flag &&
2547             s->sh.slice_type != I_SLICE) {
2548             ret = ff_hevc_slice_rpl(s);
2549             if (ret < 0) {
2550                 av_log(s->avctx, AV_LOG_WARNING,
2551                        "Error constructing the reference lists for the current slice.\n");
2552                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2553                     return ret;
2554             }
2555         }
2556
2557         ctb_addr_ts = hls_slice_data(s);
2558         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2559             s->is_decoded = 1;
2560             if ((s->pps->transquant_bypass_enable_flag ||
2561                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2562                 s->sps->sao_enabled)
2563                 restore_tqb_pixels(s);
2564         }
2565
2566         if (ctb_addr_ts < 0)
2567             return ctb_addr_ts;
2568         break;
2569     case NAL_EOS_NUT:
2570     case NAL_EOB_NUT:
2571         s->seq_decode = (s->seq_decode + 1) & 0xff;
2572         s->max_ra     = INT_MAX;
2573         break;
2574     case NAL_AUD:
2575     case NAL_FD_NUT:
2576         break;
2577     default:
2578         av_log(s->avctx, AV_LOG_INFO,
2579                "Skipping NAL unit %d\n", s->nal_unit_type);
2580     }
2581
2582     return 0;
2583 }
2584
2585 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2586  * between these functions would be nice. */
2587 static int extract_rbsp(const uint8_t *src, int length,
2588                         HEVCNAL *nal)
2589 {
2590     int i, si, di;
2591     uint8_t *dst;
2592
2593 #define STARTCODE_TEST                                                  \
2594         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2595             if (src[i + 2] != 3) {                                      \
2596                 /* startcode, so we must be past the end */             \
2597                 length = i;                                             \
2598             }                                                           \
2599             break;                                                      \
2600         }
2601 #if HAVE_FAST_UNALIGNED
2602 #define FIND_FIRST_ZERO                                                 \
2603         if (i > 0 && !src[i])                                           \
2604             i--;                                                        \
2605         while (src[i])                                                  \
2606             i++
2607 #if HAVE_FAST_64BIT
2608     for (i = 0; i + 1 < length; i += 9) {
2609         if (!((~AV_RN64A(src + i) &
2610                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2611               0x8000800080008080ULL))
2612             continue;
2613         FIND_FIRST_ZERO;
2614         STARTCODE_TEST;
2615         i -= 7;
2616     }
2617 #else
2618     for (i = 0; i + 1 < length; i += 5) {
2619         if (!((~AV_RN32A(src + i) &
2620                (AV_RN32A(src + i) - 0x01000101U)) &
2621               0x80008080U))
2622             continue;
2623         FIND_FIRST_ZERO;
2624         STARTCODE_TEST;
2625         i -= 3;
2626     }
2627 #endif /* HAVE_FAST_64BIT */
2628 #else
2629     for (i = 0; i + 1 < length; i += 2) {
2630         if (src[i])
2631             continue;
2632         if (i > 0 && src[i - 1] == 0)
2633             i--;
2634         STARTCODE_TEST;
2635     }
2636 #endif /* HAVE_FAST_UNALIGNED */
2637
2638     if (i >= length - 1) { // no escaped 0
2639         nal->data = src;
2640         nal->size = length;
2641         return length;
2642     }
2643
2644     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2645                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2646     if (!nal->rbsp_buffer)
2647         return AVERROR(ENOMEM);
2648
2649     dst = nal->rbsp_buffer;
2650
2651     memcpy(dst, src, i);
2652     si = di = i;
2653     while (si + 2 < length) {
2654         // remove escapes (very rare 1:2^22)
2655         if (src[si + 2] > 3) {
2656             dst[di++] = src[si++];
2657             dst[di++] = src[si++];
2658         } else if (src[si] == 0 && src[si + 1] == 0) {
2659             if (src[si + 2] == 3) { // escape
2660                 dst[di++] = 0;
2661                 dst[di++] = 0;
2662                 si       += 3;
2663
2664                 continue;
2665             } else // next start code
2666                 goto nsc;
2667         }
2668
2669         dst[di++] = src[si++];
2670     }
2671     while (si < length)
2672         dst[di++] = src[si++];
2673
2674 nsc:
2675     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2676
2677     nal->data = dst;
2678     nal->size = di;
2679     return si;
2680 }
2681
2682 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2683 {
2684     int i, consumed, ret = 0;
2685
2686     s->ref = NULL;
2687     s->eos = 0;
2688
2689     /* split the input packet into NAL units, so we know the upper bound on the
2690      * number of slices in the frame */
2691     s->nb_nals = 0;
2692     while (length >= 4) {
2693         HEVCNAL *nal;
2694         int extract_length = 0;
2695
2696         if (s->is_nalff) {
2697             int i;
2698             for (i = 0; i < s->nal_length_size; i++)
2699                 extract_length = (extract_length << 8) | buf[i];
2700             buf    += s->nal_length_size;
2701             length -= s->nal_length_size;
2702
2703             if (extract_length > length) {
2704                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2705                 ret = AVERROR_INVALIDDATA;
2706                 goto fail;
2707             }
2708         } else {
2709             if (buf[2] == 0) {
2710                 length--;
2711                 buf++;
2712                 continue;
2713             }
2714             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2715                 ret = AVERROR_INVALIDDATA;
2716                 goto fail;
2717             }
2718
2719             buf           += 3;
2720             length        -= 3;
2721             extract_length = length;
2722         }
2723
2724         if (s->nals_allocated < s->nb_nals + 1) {
2725             int new_size = s->nals_allocated + 1;
2726             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2727             if (!tmp) {
2728                 ret = AVERROR(ENOMEM);
2729                 goto fail;
2730             }
2731             s->nals = tmp;
2732             memset(s->nals + s->nals_allocated, 0,
2733                    (new_size - s->nals_allocated) * sizeof(*tmp));
2734             s->nals_allocated = new_size;
2735         }
2736         nal = &s->nals[s->nb_nals++];
2737
2738         consumed = extract_rbsp(buf, extract_length, nal);
2739         if (consumed < 0) {
2740             ret = consumed;
2741             goto fail;
2742         }
2743
2744         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2745         if (ret < 0)
2746             goto fail;
2747         hls_nal_unit(s);
2748
2749         if (s->nal_unit_type == NAL_EOB_NUT ||
2750             s->nal_unit_type == NAL_EOS_NUT)
2751             s->eos = 1;
2752
2753         buf    += consumed;
2754         length -= consumed;
2755     }
2756
2757     /* parse the NAL units */
2758     for (i = 0; i < s->nb_nals; i++) {
2759         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2760         if (ret < 0) {
2761             av_log(s->avctx, AV_LOG_WARNING,
2762                    "Error parsing NAL unit #%d.\n", i);
2763             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2764                 goto fail;
2765         }
2766     }
2767
2768 fail:
2769     if (s->ref)
2770         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2771
2772     return ret;
2773 }
2774
2775 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2776 {
2777     int i;
2778     for (i = 0; i < 16; i++)
2779         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2780 }
2781
2782 static int verify_md5(HEVCContext *s, AVFrame *frame)
2783 {
2784     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2785     int pixel_shift;
2786     int i, j;
2787
2788     if (!desc)
2789         return AVERROR(EINVAL);
2790
2791     pixel_shift = desc->comp[0].depth_minus1 > 7;
2792
2793     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2794            s->poc);
2795
2796     /* the checksums are LE, so we have to byteswap for >8bpp formats
2797      * on BE arches */
2798 #if HAVE_BIGENDIAN
2799     if (pixel_shift && !s->checksum_buf) {
2800         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2801                        FFMAX3(frame->linesize[0], frame->linesize[1],
2802                               frame->linesize[2]));
2803         if (!s->checksum_buf)
2804             return AVERROR(ENOMEM);
2805     }
2806 #endif
2807
2808     for (i = 0; frame->data[i]; i++) {
2809         int width  = s->avctx->coded_width;
2810         int height = s->avctx->coded_height;
2811         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2812         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2813         uint8_t md5[16];
2814
2815         av_md5_init(s->md5_ctx);
2816         for (j = 0; j < h; j++) {
2817             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2818 #if HAVE_BIGENDIAN
2819             if (pixel_shift) {
2820                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2821                                    (const uint16_t*)src, w);
2822                 src = s->checksum_buf;
2823             }
2824 #endif
2825             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2826         }
2827         av_md5_final(s->md5_ctx, md5);
2828
2829         if (!memcmp(md5, s->md5[i], 16)) {
2830             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2831             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2832             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2833         } else {
2834             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2835             print_md5(s->avctx, AV_LOG_ERROR, md5);
2836             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2837             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2838             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2839             return AVERROR_INVALIDDATA;
2840         }
2841     }
2842
2843     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2844
2845     return 0;
2846 }
2847
2848 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2849                              AVPacket *avpkt)
2850 {
2851     int ret;
2852     HEVCContext *s = avctx->priv_data;
2853
2854     if (!avpkt->size) {
2855         ret = ff_hevc_output_frame(s, data, 1);
2856         if (ret < 0)
2857             return ret;
2858
2859         *got_output = ret;
2860         return 0;
2861     }
2862
2863     s->ref = NULL;
2864     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2865     if (ret < 0)
2866         return ret;
2867
2868     /* verify the SEI checksum */
2869     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2870         s->is_md5) {
2871         ret = verify_md5(s, s->ref->frame);
2872         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2873             ff_hevc_unref_frame(s, s->ref, ~0);
2874             return ret;
2875         }
2876     }
2877     s->is_md5 = 0;
2878
2879     if (s->is_decoded) {
2880         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2881         s->is_decoded = 0;
2882     }
2883
2884     if (s->output_frame->buf[0]) {
2885         av_frame_move_ref(data, s->output_frame);
2886         *got_output = 1;
2887     }
2888
2889     return avpkt->size;
2890 }
2891
2892 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2893 {
2894     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2895     if (ret < 0)
2896         return ret;
2897
2898     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2899     if (!dst->tab_mvf_buf)
2900         goto fail;
2901     dst->tab_mvf = src->tab_mvf;
2902
2903     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2904     if (!dst->rpl_tab_buf)
2905         goto fail;
2906     dst->rpl_tab = src->rpl_tab;
2907
2908     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2909     if (!dst->rpl_buf)
2910         goto fail;
2911
2912     dst->poc        = src->poc;
2913     dst->ctb_count  = src->ctb_count;
2914     dst->window     = src->window;
2915     dst->flags      = src->flags;
2916     dst->sequence   = src->sequence;
2917
2918     return 0;
2919 fail:
2920     ff_hevc_unref_frame(s, dst, ~0);
2921     return AVERROR(ENOMEM);
2922 }
2923
2924 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2925 {
2926     HEVCContext       *s = avctx->priv_data;
2927     HEVCLocalContext *lc = &s->HEVClc;
2928     int i;
2929
2930     pic_arrays_free(s);
2931
2932     av_freep(&lc->edge_emu_buffer);
2933     av_freep(&s->md5_ctx);
2934
2935     av_frame_free(&s->tmp_frame);
2936     av_frame_free(&s->output_frame);
2937
2938     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2939         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2940         av_frame_free(&s->DPB[i].frame);
2941     }
2942
2943     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2944         av_buffer_unref(&s->vps_list[i]);
2945     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2946         av_buffer_unref(&s->sps_list[i]);
2947     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2948         av_buffer_unref(&s->pps_list[i]);
2949
2950     for (i = 0; i < s->nals_allocated; i++)
2951         av_freep(&s->nals[i].rbsp_buffer);
2952     av_freep(&s->nals);
2953     s->nals_allocated = 0;
2954
2955     return 0;
2956 }
2957
2958 static av_cold int hevc_init_context(AVCodecContext *avctx)
2959 {
2960     HEVCContext *s = avctx->priv_data;
2961     int i;
2962
2963     s->avctx = avctx;
2964
2965     s->tmp_frame = av_frame_alloc();
2966     if (!s->tmp_frame)
2967         goto fail;
2968
2969     s->output_frame = av_frame_alloc();
2970     if (!s->output_frame)
2971         goto fail;
2972
2973     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2974         s->DPB[i].frame = av_frame_alloc();
2975         if (!s->DPB[i].frame)
2976             goto fail;
2977         s->DPB[i].tf.f = s->DPB[i].frame;
2978     }
2979
2980     s->max_ra = INT_MAX;
2981
2982     s->md5_ctx = av_md5_alloc();
2983     if (!s->md5_ctx)
2984         goto fail;
2985
2986     ff_dsputil_init(&s->dsp, avctx);
2987
2988     s->context_initialized = 1;
2989
2990     return 0;
2991
2992 fail:
2993     hevc_decode_free(avctx);
2994     return AVERROR(ENOMEM);
2995 }
2996
2997 static int hevc_update_thread_context(AVCodecContext *dst,
2998                                       const AVCodecContext *src)
2999 {
3000     HEVCContext *s  = dst->priv_data;
3001     HEVCContext *s0 = src->priv_data;
3002     int i, ret;
3003
3004     if (!s->context_initialized) {
3005         ret = hevc_init_context(dst);
3006         if (ret < 0)
3007             return ret;
3008     }
3009
3010     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3011         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3012         if (s0->DPB[i].frame->buf[0]) {
3013             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3014             if (ret < 0)
3015                 return ret;
3016         }
3017     }
3018
3019     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3020         av_buffer_unref(&s->vps_list[i]);
3021         if (s0->vps_list[i]) {
3022             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3023             if (!s->vps_list[i])
3024                 return AVERROR(ENOMEM);
3025         }
3026     }
3027
3028     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3029         av_buffer_unref(&s->sps_list[i]);
3030         if (s0->sps_list[i]) {
3031             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3032             if (!s->sps_list[i])
3033                 return AVERROR(ENOMEM);
3034         }
3035     }
3036
3037     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3038         av_buffer_unref(&s->pps_list[i]);
3039         if (s0->pps_list[i]) {
3040             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3041             if (!s->pps_list[i])
3042                 return AVERROR(ENOMEM);
3043         }
3044     }
3045
3046     if (s->sps != s0->sps)
3047         ret = set_sps(s, s0->sps);
3048
3049     s->seq_decode = s0->seq_decode;
3050     s->seq_output = s0->seq_output;
3051     s->pocTid0    = s0->pocTid0;
3052     s->max_ra     = s0->max_ra;
3053
3054     s->is_nalff        = s0->is_nalff;
3055     s->nal_length_size = s0->nal_length_size;
3056
3057     if (s0->eos) {
3058         s->seq_decode = (s->seq_decode + 1) & 0xff;
3059         s->max_ra = INT_MAX;
3060     }
3061
3062     return 0;
3063 }
3064
3065 static int hevc_decode_extradata(HEVCContext *s)
3066 {
3067     AVCodecContext *avctx = s->avctx;
3068     GetByteContext gb;
3069     int ret;
3070
3071     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3072
3073     if (avctx->extradata_size > 3 &&
3074         (avctx->extradata[0] || avctx->extradata[1] ||
3075          avctx->extradata[2] > 1)) {
3076         /* It seems the extradata is encoded as hvcC format.
3077          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3078          * is finalized. When finalized, configurationVersion will be 1 and we
3079          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3080         int i, j, num_arrays, nal_len_size;
3081
3082         s->is_nalff = 1;
3083
3084         bytestream2_skip(&gb, 21);
3085         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3086         num_arrays   = bytestream2_get_byte(&gb);
3087
3088         /* nal units in the hvcC always have length coded with 2 bytes,
3089          * so put a fake nal_length_size = 2 while parsing them */
3090         s->nal_length_size = 2;
3091
3092         /* Decode nal units from hvcC. */
3093         for (i = 0; i < num_arrays; i++) {
3094             int type = bytestream2_get_byte(&gb) & 0x3f;
3095             int cnt  = bytestream2_get_be16(&gb);
3096
3097             for (j = 0; j < cnt; j++) {
3098                 // +2 for the nal size field
3099                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3100                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3101                     av_log(s->avctx, AV_LOG_ERROR,
3102                            "Invalid NAL unit size in extradata.\n");
3103                     return AVERROR_INVALIDDATA;
3104                 }
3105
3106                 ret = decode_nal_units(s, gb.buffer, nalsize);
3107                 if (ret < 0) {
3108                     av_log(avctx, AV_LOG_ERROR,
3109                            "Decoding nal unit %d %d from hvcC failed\n",
3110                            type, i);
3111                     return ret;
3112                 }
3113                 bytestream2_skip(&gb, nalsize);
3114             }
3115         }
3116
3117         /* Now store right nal length size, that will be used to parse
3118          * all other nals */
3119         s->nal_length_size = nal_len_size;
3120     } else {
3121         s->is_nalff = 0;
3122         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3123         if (ret < 0)
3124             return ret;
3125     }
3126     return 0;
3127 }
3128
3129 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3130 {
3131     HEVCContext *s = avctx->priv_data;
3132     int ret;
3133
3134     ff_init_cabac_states();
3135
3136     avctx->internal->allocate_progress = 1;
3137
3138     ret = hevc_init_context(avctx);
3139     if (ret < 0)
3140         return ret;
3141
3142     if (avctx->extradata_size > 0 && avctx->extradata) {
3143         ret = hevc_decode_extradata(s);
3144         if (ret < 0) {
3145             hevc_decode_free(avctx);
3146             return ret;
3147         }
3148     }
3149
3150     return 0;
3151 }
3152
3153 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3154 {
3155     HEVCContext *s = avctx->priv_data;
3156     int ret;
3157
3158     memset(s, 0, sizeof(*s));
3159
3160     ret = hevc_init_context(avctx);
3161     if (ret < 0)
3162         return ret;
3163
3164     return 0;
3165 }
3166
3167 static void hevc_decode_flush(AVCodecContext *avctx)
3168 {
3169     HEVCContext *s = avctx->priv_data;
3170     ff_hevc_flush_dpb(s);
3171     s->max_ra = INT_MAX;
3172 }
3173
3174 #define OFFSET(x) offsetof(HEVCContext, x)
3175 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3176
3177 static const AVProfile profiles[] = {
3178     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3179     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3180     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3181     { FF_PROFILE_UNKNOWN },
3182 };
3183
3184 static const AVOption options[] = {
3185     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3186         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3187     { NULL },
3188 };
3189
3190 static const AVClass hevc_decoder_class = {
3191     .class_name = "HEVC decoder",
3192     .item_name  = av_default_item_name,
3193     .option     = options,
3194     .version    = LIBAVUTIL_VERSION_INT,
3195 };
3196
3197 AVCodec ff_hevc_decoder = {
3198     .name                  = "hevc",
3199     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3200     .type                  = AVMEDIA_TYPE_VIDEO,
3201     .id                    = AV_CODEC_ID_HEVC,
3202     .priv_data_size        = sizeof(HEVCContext),
3203     .priv_class            = &hevc_decoder_class,
3204     .init                  = hevc_decode_init,
3205     .close                 = hevc_decode_free,
3206     .decode                = hevc_decode_frame,
3207     .flush                 = hevc_decode_flush,
3208     .update_thread_context = hevc_update_thread_context,
3209     .init_thread_copy      = hevc_init_thread_copy,
3210     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3211                              CODEC_CAP_FRAME_THREADS,
3212     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3213 };