git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32
  33 #include "bytestream.h"
  34 #include "cabac_functions.h"
  35 #include "dsputil.h"
  36 #include "golomb.h"
  37 #include "hevc.h"
  38
  39 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  40 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  41 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  42
  43 static const uint8_t scan_1x1[1] = { 0 };
  44
  45 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  46
  47 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  48
  49 static const uint8_t horiz_scan4x4_x[16] = {
  50     0, 1, 2, 3,
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54 };
  55
  56 static const uint8_t horiz_scan4x4_y[16] = {
  57     0, 0, 0, 0,
  58     1, 1, 1, 1,
  59     2, 2, 2, 2,
  60     3, 3, 3, 3,
  61 };
  62
  63 static const uint8_t horiz_scan8x8_inv[8][8] = {
  64     {  0,  1,  2,  3, 16, 17, 18, 19, },
  65     {  4,  5,  6,  7, 20, 21, 22, 23, },
  66     {  8,  9, 10, 11, 24, 25, 26, 27, },
  67     { 12, 13, 14, 15, 28, 29, 30, 31, },
  68     { 32, 33, 34, 35, 48, 49, 50, 51, },
  69     { 36, 37, 38, 39, 52, 53, 54, 55, },
  70     { 40, 41, 42, 43, 56, 57, 58, 59, },
  71     { 44, 45, 46, 47, 60, 61, 62, 63, },
  72 };
  73
  74 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  75
  76 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  77
  78 static const uint8_t diag_scan2x2_inv[2][2] = {
  79     { 0, 2, },
  80     { 1, 3, },
  81 };
  82
  83 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  84     0, 0, 1, 0,
  85     1, 2, 0, 1,
  86     2, 3, 1, 2,
  87     3, 2, 3, 3,
  88 };
  89
  90 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  91     0, 1, 0, 2,
  92     1, 0, 3, 2,
  93     1, 0, 3, 2,
  94     1, 3, 2, 3,
  95 };
  96
  97 static const uint8_t diag_scan4x4_inv[4][4] = {
  98     { 0,  2,  5,  9, },
  99     { 1,  4,  8, 12, },
 100     { 3,  7, 11, 14, },
 101     { 6, 10, 13, 15, },
 102 };
 103
 104 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 105     0, 0, 1, 0,
 106     1, 2, 0, 1,
 107     2, 3, 0, 1,
 108     2, 3, 4, 0,
 109     1, 2, 3, 4,
 110     5, 0, 1, 2,
 111     3, 4, 5, 6,
 112     0, 1, 2, 3,
 113     4, 5, 6, 7,
 114     1, 2, 3, 4,
 115     5, 6, 7, 2,
 116     3, 4, 5, 6,
 117     7, 3, 4, 5,
 118     6, 7, 4, 5,
 119     6, 7, 5, 6,
 120     7, 6, 7, 7,
 121 };
 122
 123 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 124     0, 1, 0, 2,
 125     1, 0, 3, 2,
 126     1, 0, 4, 3,
 127     2, 1, 0, 5,
 128     4, 3, 2, 1,
 129     0, 6, 5, 4,
 130     3, 2, 1, 0,
 131     7, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 7,
 135     6, 5, 4, 3,
 136     2, 7, 6, 5,
 137     4, 3, 7, 6,
 138     5, 4, 7, 6,
 139     5, 7, 6, 7,
 140 };
 141
 142 static const uint8_t diag_scan8x8_inv[8][8] = {
 143     {  0,  2,  5,  9, 14, 20, 27, 35, },
 144     {  1,  4,  8, 13, 19, 26, 34, 42, },
 145     {  3,  7, 12, 18, 25, 33, 41, 48, },
 146     {  6, 11, 17, 24, 32, 40, 47, 53, },
 147     { 10, 16, 23, 31, 39, 46, 52, 57, },
 148     { 15, 22, 30, 38, 45, 51, 56, 60, },
 149     { 21, 29, 37, 44, 50, 55, 59, 62, },
 150     { 28, 36, 43, 49, 54, 58, 61, 63, },
 151 };
 152
 153 /**
 154  * NOTE: Each function hls_foo correspond to the function foo in the
 155  * specification (HLS stands for High Level Syntax).
 156  */
 157
 158 /**
 159  * Section 5.7
 160  */
 161
 162 /* free everything allocated  by pic_arrays_init() */
 163 static void pic_arrays_free(HEVCContext *s)
 164 {
 165     av_freep(&s->sao);
 166     av_freep(&s->deblock);
 167     av_freep(&s->split_cu_flag);
 168
 169     av_freep(&s->skip_flag);
 170     av_freep(&s->tab_ct_depth);
 171
 172     av_freep(&s->tab_ipm);
 173     av_freep(&s->cbf_luma);
 174     av_freep(&s->is_pcm);
 175
 176     av_freep(&s->qp_y_tab);
 177     av_freep(&s->tab_slice_address);
 178     av_freep(&s->filter_slice_edges);
 179
 180     av_freep(&s->horizontal_bs);
 181     av_freep(&s->vertical_bs);
 182
 183     av_buffer_pool_uninit(&s->tab_mvf_pool);
 184     av_buffer_pool_uninit(&s->rpl_tab_pool);
 185 }
 186
 187 /* allocate arrays that depend on frame dimensions */
 188 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 189 {
 190     int log2_min_cb_size = sps->log2_min_cb_size;
 191     int width            = sps->width;
 192     int height           = sps->height;
 193     int pic_size         = width * height;
 194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 195                            ((height >> log2_min_cb_size) + 1);
 196     int ctb_count        = sps->ctb_width * sps->ctb_height;
 197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 198
 199     s->bs_width  = width  >> 3;
 200     s->bs_height = height >> 3;
 201
 202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 204     s->split_cu_flag = av_malloc(pic_size);
 205     if (!s->sao || !s->deblock || !s->split_cu_flag)
 206         goto fail;
 207
 208     s->skip_flag    = av_malloc(pic_size_in_ctb);
 209     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 210     if (!s->skip_flag || !s->tab_ct_depth)
 211         goto fail;
 212
 213     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 214     s->tab_ipm  = av_malloc(min_pu_size);
 215     s->is_pcm   = av_malloc(min_pu_size);
 216     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 217         goto fail;
 218
 219     s->filter_slice_edges = av_malloc(ctb_count);
 220     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 221                                       sizeof(*s->tab_slice_address));
 222     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 223                                       sizeof(*s->qp_y_tab));
 224     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 225         goto fail;
 226
 227     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 228     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     if (!s->horizontal_bs || !s->vertical_bs)
 230         goto fail;
 231
 232     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 233                                           av_buffer_alloc);
 234     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 235                                           av_buffer_allocz);
 236     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 237         goto fail;
 238
 239     return 0;
 240
 241 fail:
 242     pic_arrays_free(s);
 243     return AVERROR(ENOMEM);
 244 }
 245
 246 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 247 {
 248     int i = 0;
 249     int j = 0;
 250     uint8_t luma_weight_l0_flag[16];
 251     uint8_t chroma_weight_l0_flag[16];
 252     uint8_t luma_weight_l1_flag[16];
 253     uint8_t chroma_weight_l1_flag[16];
 254
 255     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 256     if (s->sps->chroma_format_idc != 0) {
 257         int delta = get_se_golomb(gb);
 258         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 259     }
 260
 261     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 262         luma_weight_l0_flag[i] = get_bits1(gb);
 263         if (!luma_weight_l0_flag[i]) {
 264             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 265             s->sh.luma_offset_l0[i] = 0;
 266         }
 267     }
 268     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 269         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 270             chroma_weight_l0_flag[i] = get_bits1(gb);
 271     } else {
 272         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 273             chroma_weight_l0_flag[i] = 0;
 274     }
 275     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 276         if (luma_weight_l0_flag[i]) {
 277             int delta_luma_weight_l0 = get_se_golomb(gb);
 278             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 279             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 280         }
 281         if (chroma_weight_l0_flag[i]) {
 282             for (j = 0; j < 2; j++) {
 283                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 284                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 285                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 286                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 287                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 288             }
 289         } else {
 290             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 291             s->sh.chroma_offset_l0[i][0] = 0;
 292             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 293             s->sh.chroma_offset_l0[i][1] = 0;
 294         }
 295     }
 296     if (s->sh.slice_type == B_SLICE) {
 297         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 298             luma_weight_l1_flag[i] = get_bits1(gb);
 299             if (!luma_weight_l1_flag[i]) {
 300                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 301                 s->sh.luma_offset_l1[i] = 0;
 302             }
 303         }
 304         if (s->sps->chroma_format_idc != 0) {
 305             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 306                 chroma_weight_l1_flag[i] = get_bits1(gb);
 307         } else {
 308             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 309                 chroma_weight_l1_flag[i] = 0;
 310         }
 311         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 312             if (luma_weight_l1_flag[i]) {
 313                 int delta_luma_weight_l1 = get_se_golomb(gb);
 314                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 315                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 316             }
 317             if (chroma_weight_l1_flag[i]) {
 318                 for (j = 0; j < 2; j++) {
 319                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 320                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 321                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 322                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 323                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 324                 }
 325             } else {
 326                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 327                 s->sh.chroma_offset_l1[i][0] = 0;
 328                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 329                 s->sh.chroma_offset_l1[i][1] = 0;
 330             }
 331         }
 332     }
 333 }
 334
 335 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 336 {
 337     const HEVCSPS *sps = s->sps;
 338     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 339     int prev_delta_msb = 0;
 340     int nb_sps = 0, nb_sh;
 341     int i;
 342
 343     rps->nb_refs = 0;
 344     if (!sps->long_term_ref_pics_present_flag)
 345         return 0;
 346
 347     if (sps->num_long_term_ref_pics_sps > 0)
 348         nb_sps = get_ue_golomb_long(gb);
 349     nb_sh = get_ue_golomb_long(gb);
 350
 351     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 352         return AVERROR_INVALIDDATA;
 353
 354     rps->nb_refs = nb_sh + nb_sps;
 355
 356     for (i = 0; i < rps->nb_refs; i++) {
 357         uint8_t delta_poc_msb_present;
 358
 359         if (i < nb_sps) {
 360             uint8_t lt_idx_sps = 0;
 361
 362             if (sps->num_long_term_ref_pics_sps > 1)
 363                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 364
 365             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 366             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 367         } else {
 368             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 369             rps->used[i] = get_bits1(gb);
 370         }
 371
 372         delta_poc_msb_present = get_bits1(gb);
 373         if (delta_poc_msb_present) {
 374             int delta = get_ue_golomb_long(gb);
 375
 376             if (i && i != nb_sps)
 377                 delta += prev_delta_msb;
 378
 379             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 380             prev_delta_msb = delta;
 381         }
 382     }
 383
 384     return 0;
 385 }
 386
 387 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 388 {
 389     int ret;
 390
 391     pic_arrays_free(s);
 392     ret = pic_arrays_init(s, sps);
 393     if (ret < 0)
 394         goto fail;
 395
 396     s->avctx->coded_width         = sps->width;
 397     s->avctx->coded_height        = sps->height;
 398     s->avctx->width               = sps->output_width;
 399     s->avctx->height              = sps->output_height;
 400     s->avctx->pix_fmt             = sps->pix_fmt;
 401     s->avctx->sample_aspect_ratio = sps->vui.sar;
 402     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 403
 404     if (sps->vui.video_signal_type_present_flag)
 405         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 406                                                                : AVCOL_RANGE_MPEG;
 407     else
 408         s->avctx->color_range = AVCOL_RANGE_MPEG;
 409
 410     if (sps->vui.colour_description_present_flag) {
 411         s->avctx->color_primaries = sps->vui.colour_primaries;
 412         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 413         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 414     } else {
 415         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 416         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 417         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 418     }
 419
 420     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 421     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 422     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 423
 424     if (sps->sao_enabled) {
 425         av_frame_unref(s->tmp_frame);
 426         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 427         if (ret < 0)
 428             goto fail;
 429         s->frame = s->tmp_frame;
 430     }
 431
 432     s->sps = sps;
 433     s->vps = s->vps_list[s->sps->vps_id];
 434     return 0;
 435
 436 fail:
 437     pic_arrays_free(s);
 438     s->sps = NULL;
 439     return ret;
 440 }
 441
 442 static int hls_slice_header(HEVCContext *s)
 443 {
 444     GetBitContext *gb = &s->HEVClc.gb;
 445     SliceHeader *sh   = &s->sh;
 446     int i, ret;
 447
 448     // Coded parameters
 449     sh->first_slice_in_pic_flag = get_bits1(gb);
 450     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 451         s->seq_decode = (s->seq_decode + 1) & 0xff;
 452         s->max_ra     = INT_MAX;
 453         if (IS_IDR(s))
 454             ff_hevc_clear_refs(s);
 455     }
 456     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 457         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 458
 459     sh->pps_id = get_ue_golomb_long(gb);
 460     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 461         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 462         return AVERROR_INVALIDDATA;
 463     }
 464     if (!sh->first_slice_in_pic_flag &&
 465         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 466         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 467         return AVERROR_INVALIDDATA;
 468     }
 469     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 470
 471     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 472         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 473
 474         ff_hevc_clear_refs(s);
 475         ret = set_sps(s, s->sps);
 476         if (ret < 0)
 477             return ret;
 478
 479         s->seq_decode = (s->seq_decode + 1) & 0xff;
 480         s->max_ra     = INT_MAX;
 481     }
 482
 483     sh->dependent_slice_segment_flag = 0;
 484     if (!sh->first_slice_in_pic_flag) {
 485         int slice_address_length;
 486
 487         if (s->pps->dependent_slice_segments_enabled_flag)
 488             sh->dependent_slice_segment_flag = get_bits1(gb);
 489
 490         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 491                                             s->sps->ctb_height);
 492         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 493         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 494             av_log(s->avctx, AV_LOG_ERROR,
 495                    "Invalid slice segment address: %u.\n",
 496                    sh->slice_segment_addr);
 497             return AVERROR_INVALIDDATA;
 498         }
 499
 500         if (!sh->dependent_slice_segment_flag) {
 501             sh->slice_addr = sh->slice_segment_addr;
 502             s->slice_idx++;
 503         }
 504     } else {
 505         sh->slice_segment_addr = sh->slice_addr = 0;
 506         s->slice_idx           = 0;
 507         s->slice_initialized   = 0;
 508     }
 509
 510     if (!sh->dependent_slice_segment_flag) {
 511         s->slice_initialized = 0;
 512
 513         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 514             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 515
 516         sh->slice_type = get_ue_golomb_long(gb);
 517         if (!(sh->slice_type == I_SLICE ||
 518               sh->slice_type == P_SLICE ||
 519               sh->slice_type == B_SLICE)) {
 520             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 521                    sh->slice_type);
 522             return AVERROR_INVALIDDATA;
 523         }
 524         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 525             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 526             return AVERROR_INVALIDDATA;
 527         }
 528
 529         if (s->pps->output_flag_present_flag)
 530             sh->pic_output_flag = get_bits1(gb);
 531
 532         if (s->sps->separate_colour_plane_flag)
 533             sh->colour_plane_id = get_bits(gb, 2);
 534
 535         if (!IS_IDR(s)) {
 536             int short_term_ref_pic_set_sps_flag, poc;
 537
 538             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 539             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 540             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 541                 av_log(s->avctx, AV_LOG_WARNING,
 542                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 543                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 544                     return AVERROR_INVALIDDATA;
 545                 poc = s->poc;
 546             }
 547             s->poc = poc;
 548
 549             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 550             if (!short_term_ref_pic_set_sps_flag) {
 551                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 552                 if (ret < 0)
 553                     return ret;
 554
 555                 sh->short_term_rps = &sh->slice_rps;
 556             } else {
 557                 int numbits, rps_idx;
 558
 559                 if (!s->sps->nb_st_rps) {
 560                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 561                     return AVERROR_INVALIDDATA;
 562                 }
 563
 564                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 565                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 566                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 567             }
 568
 569             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 570             if (ret < 0) {
 571                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 572                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 573                     return AVERROR_INVALIDDATA;
 574             }
 575
 576             if (s->sps->sps_temporal_mvp_enabled_flag)
 577                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 578             else
 579                 sh->slice_temporal_mvp_enabled_flag = 0;
 580         } else {
 581             s->sh.short_term_rps = NULL;
 582             s->poc               = 0;
 583         }
 584
 585         /* 8.3.1 */
 586         if (s->temporal_id == 0 &&
 587             s->nal_unit_type != NAL_TRAIL_N &&
 588             s->nal_unit_type != NAL_TSA_N   &&
 589             s->nal_unit_type != NAL_STSA_N  &&
 590             s->nal_unit_type != NAL_RADL_N  &&
 591             s->nal_unit_type != NAL_RADL_R  &&
 592             s->nal_unit_type != NAL_RASL_N  &&
 593             s->nal_unit_type != NAL_RASL_R)
 594             s->pocTid0 = s->poc;
 595
 596         if (s->sps->sao_enabled) {
 597             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 598             sh->slice_sample_adaptive_offset_flag[1] =
 599             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 600         } else {
 601             sh->slice_sample_adaptive_offset_flag[0] = 0;
 602             sh->slice_sample_adaptive_offset_flag[1] = 0;
 603             sh->slice_sample_adaptive_offset_flag[2] = 0;
 604         }
 605
 606         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 607         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 608             int nb_refs;
 609
 610             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 611             if (sh->slice_type == B_SLICE)
 612                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 613
 614             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 615                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 616                 if (sh->slice_type == B_SLICE)
 617                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 618             }
 619             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 620                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 621                        sh->nb_refs[L0], sh->nb_refs[L1]);
 622                 return AVERROR_INVALIDDATA;
 623             }
 624
 625             sh->rpl_modification_flag[0] = 0;
 626             sh->rpl_modification_flag[1] = 0;
 627             nb_refs = ff_hevc_frame_nb_refs(s);
 628             if (!nb_refs) {
 629                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 630                 return AVERROR_INVALIDDATA;
 631             }
 632
 633             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 634                 sh->rpl_modification_flag[0] = get_bits1(gb);
 635                 if (sh->rpl_modification_flag[0]) {
 636                     for (i = 0; i < sh->nb_refs[L0]; i++)
 637                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 638                 }
 639
 640                 if (sh->slice_type == B_SLICE) {
 641                     sh->rpl_modification_flag[1] = get_bits1(gb);
 642                     if (sh->rpl_modification_flag[1] == 1)
 643                         for (i = 0; i < sh->nb_refs[L1]; i++)
 644                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 645                 }
 646             }
 647
 648             if (sh->slice_type == B_SLICE)
 649                 sh->mvd_l1_zero_flag = get_bits1(gb);
 650
 651             if (s->pps->cabac_init_present_flag)
 652                 sh->cabac_init_flag = get_bits1(gb);
 653             else
 654                 sh->cabac_init_flag = 0;
 655
 656             sh->collocated_ref_idx = 0;
 657             if (sh->slice_temporal_mvp_enabled_flag) {
 658                 sh->collocated_list = L0;
 659                 if (sh->slice_type == B_SLICE)
 660                     sh->collocated_list = !get_bits1(gb);
 661
 662                 if (sh->nb_refs[sh->collocated_list] > 1) {
 663                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 664                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 665                         av_log(s->avctx, AV_LOG_ERROR,
 666                                "Invalid collocated_ref_idx: %d.\n",
 667                                sh->collocated_ref_idx);
 668                         return AVERROR_INVALIDDATA;
 669                     }
 670                 }
 671             }
 672
 673             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 674                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 675                 pred_weight_table(s, gb);
 676             }
 677
 678             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 679             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 680                 av_log(s->avctx, AV_LOG_ERROR,
 681                        "Invalid number of merging MVP candidates: %d.\n",
 682                        sh->max_num_merge_cand);
 683                 return AVERROR_INVALIDDATA;
 684             }
 685         }
 686
 687         sh->slice_qp_delta = get_se_golomb(gb);
 688         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 689             sh->slice_cb_qp_offset = get_se_golomb(gb);
 690             sh->slice_cr_qp_offset = get_se_golomb(gb);
 691         } else {
 692             sh->slice_cb_qp_offset = 0;
 693             sh->slice_cr_qp_offset = 0;
 694         }
 695
 696         if (s->pps->deblocking_filter_control_present_flag) {
 697             int deblocking_filter_override_flag = 0;
 698
 699             if (s->pps->deblocking_filter_override_enabled_flag)
 700                 deblocking_filter_override_flag = get_bits1(gb);
 701
 702             if (deblocking_filter_override_flag) {
 703                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 704                 if (!sh->disable_deblocking_filter_flag) {
 705                     sh->beta_offset = get_se_golomb(gb) * 2;
 706                     sh->tc_offset   = get_se_golomb(gb) * 2;
 707                 }
 708             } else {
 709                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 710                 sh->beta_offset                    = s->pps->beta_offset;
 711                 sh->tc_offset                      = s->pps->tc_offset;
 712             }
 713         } else {
 714             sh->disable_deblocking_filter_flag = 0;
 715             sh->beta_offset                    = 0;
 716             sh->tc_offset                      = 0;
 717         }
 718
 719         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 720             (sh->slice_sample_adaptive_offset_flag[0] ||
 721              sh->slice_sample_adaptive_offset_flag[1] ||
 722              !sh->disable_deblocking_filter_flag)) {
 723             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 724         } else {
 725             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 726         }
 727     } else if (!s->slice_initialized) {
 728         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 729         return AVERROR_INVALIDDATA;
 730     }
 731
 732     sh->num_entry_point_offsets = 0;
 733     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 734         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 735         if (sh->num_entry_point_offsets > 0) {
 736             int offset_len = get_ue_golomb_long(gb) + 1;
 737
 738             for (i = 0; i < sh->num_entry_point_offsets; i++)
 739                 skip_bits(gb, offset_len);
 740         }
 741     }
 742
 743     if (s->pps->slice_header_extension_present_flag) {
 744         int length = get_ue_golomb_long(gb);
 745         for (i = 0; i < length; i++)
 746             skip_bits(gb, 8);  // slice_header_extension_data_byte
 747     }
 748
 749     // Inferred parameters
 750     sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 751     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 752
 753     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 754
 755     if (!s->pps->cu_qp_delta_enabled_flag)
 756         s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
 757                           (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
 758
 759     s->slice_initialized = 1;
 760
 761     return 0;
 762 }
 763
 764 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 765
 766 #define SET_SAO(elem, value)                            \
 767 do {                                                    \
 768     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 769         sao->elem = value;                              \
 770     else if (sao_merge_left_flag)                       \
 771         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 772     else if (sao_merge_up_flag)                         \
 773         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 774     else                                                \
 775         sao->elem = 0;                                  \
 776 } while (0)
 777
 778 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 779 {
 780     HEVCLocalContext *lc    = &s->HEVClc;
 781     int sao_merge_left_flag = 0;
 782     int sao_merge_up_flag   = 0;
 783     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 784     SAOParams *sao          = &CTB(s->sao, rx, ry);
 785     int c_idx, i;
 786
 787     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 788         s->sh.slice_sample_adaptive_offset_flag[1]) {
 789         if (rx > 0) {
 790             if (lc->ctb_left_flag)
 791                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 792         }
 793         if (ry > 0 && !sao_merge_left_flag) {
 794             if (lc->ctb_up_flag)
 795                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 796         }
 797     }
 798
 799     for (c_idx = 0; c_idx < 3; c_idx++) {
 800         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 801             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 802             continue;
 803         }
 804
 805         if (c_idx == 2) {
 806             sao->type_idx[2] = sao->type_idx[1];
 807             sao->eo_class[2] = sao->eo_class[1];
 808         } else {
 809             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 810         }
 811
 812         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 813             continue;
 814
 815         for (i = 0; i < 4; i++)
 816             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 817
 818         if (sao->type_idx[c_idx] == SAO_BAND) {
 819             for (i = 0; i < 4; i++) {
 820                 if (sao->offset_abs[c_idx][i]) {
 821                     SET_SAO(offset_sign[c_idx][i],
 822                             ff_hevc_sao_offset_sign_decode(s));
 823                 } else {
 824                     sao->offset_sign[c_idx][i] = 0;
 825                 }
 826             }
 827             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 828         } else if (c_idx != 2) {
 829             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 830         }
 831
 832         // Inferred parameters
 833         sao->offset_val[c_idx][0] = 0;
 834         for (i = 0; i < 4; i++) {
 835             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 836             if (sao->type_idx[c_idx] == SAO_EDGE) {
 837                 if (i > 1)
 838                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 839             } else if (sao->offset_sign[c_idx][i]) {
 840                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 841             }
 842         }
 843     }
 844 }
 845
 846 #undef SET_SAO
 847 #undef CTB
 848
 849 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 850                                 int log2_trafo_size, enum ScanType scan_idx,
 851                                 int c_idx)
 852 {
 853 #define GET_COORD(offset, n)                                    \
 854     do {                                                        \
 855         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 856         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 857     } while (0)
 858     HEVCLocalContext *lc    = &s->HEVClc;
 859     int transform_skip_flag = 0;
 860
 861     int last_significant_coeff_x, last_significant_coeff_y;
 862     int last_scan_pos;
 863     int n_end;
 864     int num_coeff    = 0;
 865     int greater1_ctx = 1;
 866
 867     int num_last_subset;
 868     int x_cg_last_sig, y_cg_last_sig;
 869
 870     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 871
 872     ptrdiff_t stride = s->frame->linesize[c_idx];
 873     int hshift       = s->sps->hshift[c_idx];
 874     int vshift       = s->sps->vshift[c_idx];
 875     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 876                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 877     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 878     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 879
 880     int trafo_size = 1 << log2_trafo_size;
 881     int i, qp, shift, add, scale, scale_m;
 882     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 883     const uint8_t *scale_matrix;
 884     uint8_t dc_scale;
 885
 886     // Derive QP for dequant
 887     if (!lc->cu.cu_transquant_bypass_flag) {
 888         static const int qp_c[] = {
 889             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 890         };
 891
 892         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 893             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 894             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 895             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 896         };
 897
 898         static const uint8_t div6[51 + 2 * 6 + 1] = {
 899             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 900             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 901             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 902         };
 903         int qp_y = lc->qp_y;
 904
 905         if (c_idx == 0) {
 906             qp = qp_y + s->sps->qp_bd_offset;
 907         } else {
 908             int qp_i, offset;
 909
 910             if (c_idx == 1)
 911                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 912             else
 913                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 914
 915             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 916             if (qp_i < 30)
 917                 qp = qp_i;
 918             else if (qp_i > 43)
 919                 qp = qp_i - 6;
 920             else
 921                 qp = qp_c[qp_i - 30];
 922
 923             qp += s->sps->qp_bd_offset;
 924         }
 925
 926         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 927         add      = 1 << (shift - 1);
 928         scale    = level_scale[rem6[qp]] << (div6[qp]);
 929         scale_m  = 16; // default when no custom scaling lists.
 930         dc_scale = 16;
 931
 932         if (s->sps->scaling_list_enable_flag) {
 933             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 934                                     &s->pps->scaling_list : &s->sps->scaling_list;
 935             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 936
 937             if (log2_trafo_size != 5)
 938                 matrix_id = 3 * matrix_id + c_idx;
 939
 940             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 941             if (log2_trafo_size >= 4)
 942                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 943         }
 944     }
 945
 946     if (s->pps->transform_skip_enabled_flag &&
 947         !lc->cu.cu_transquant_bypass_flag   &&
 948         log2_trafo_size == 2) {
 949         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 950     }
 951
 952     last_significant_coeff_x =
 953         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 954     last_significant_coeff_y =
 955         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 956
 957     if (last_significant_coeff_x > 3) {
 958         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 959         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 960                                    (2 + (last_significant_coeff_x & 1)) +
 961                                    suffix;
 962     }
 963
 964     if (last_significant_coeff_y > 3) {
 965         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 966         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 967                                    (2 + (last_significant_coeff_y & 1)) +
 968                                    suffix;
 969     }
 970
 971     if (scan_idx == SCAN_VERT)
 972         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 973
 974     x_cg_last_sig = last_significant_coeff_x >> 2;
 975     y_cg_last_sig = last_significant_coeff_y >> 2;
 976
 977     switch (scan_idx) {
 978     case SCAN_DIAG: {
 979         int last_x_c = last_significant_coeff_x & 3;
 980         int last_y_c = last_significant_coeff_y & 3;
 981
 982         scan_x_off = ff_hevc_diag_scan4x4_x;
 983         scan_y_off = ff_hevc_diag_scan4x4_y;
 984         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
 985         if (trafo_size == 4) {
 986             scan_x_cg = scan_1x1;
 987             scan_y_cg = scan_1x1;
 988         } else if (trafo_size == 8) {
 989             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 990             scan_x_cg  = diag_scan2x2_x;
 991             scan_y_cg  = diag_scan2x2_y;
 992         } else if (trafo_size == 16) {
 993             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 994             scan_x_cg  = ff_hevc_diag_scan4x4_x;
 995             scan_y_cg  = ff_hevc_diag_scan4x4_y;
 996         } else { // trafo_size == 32
 997             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 998             scan_x_cg  = ff_hevc_diag_scan8x8_x;
 999             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1000         }
1001         break;
1002     }
1003     case SCAN_HORIZ:
1004         scan_x_cg  = horiz_scan2x2_x;
1005         scan_y_cg  = horiz_scan2x2_y;
1006         scan_x_off = horiz_scan4x4_x;
1007         scan_y_off = horiz_scan4x4_y;
1008         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1009         break;
1010     default: //SCAN_VERT
1011         scan_x_cg  = horiz_scan2x2_y;
1012         scan_y_cg  = horiz_scan2x2_x;
1013         scan_x_off = horiz_scan4x4_y;
1014         scan_y_off = horiz_scan4x4_x;
1015         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1016         break;
1017     }
1018     num_coeff++;
1019     num_last_subset = (num_coeff - 1) >> 4;
1020
1021     for (i = num_last_subset; i >= 0; i--) {
1022         int n, m;
1023         int x_cg, y_cg, x_c, y_c;
1024         int implicit_non_zero_coeff = 0;
1025         int64_t trans_coeff_level;
1026         int prev_sig = 0;
1027         int offset   = i << 4;
1028
1029         uint8_t significant_coeff_flag_idx[16];
1030         uint8_t nb_significant_coeff_flag = 0;
1031
1032         x_cg = scan_x_cg[i];
1033         y_cg = scan_y_cg[i];
1034
1035         if (i < num_last_subset && i > 0) {
1036             int ctx_cg = 0;
1037             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1038                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1039             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1040                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1041
1042             significant_coeff_group_flag[x_cg][y_cg] =
1043                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1044             implicit_non_zero_coeff = 1;
1045         } else {
1046             significant_coeff_group_flag[x_cg][y_cg] =
1047                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1048                  (x_cg == 0 && y_cg == 0));
1049         }
1050
1051         last_scan_pos = num_coeff - offset - 1;
1052
1053         if (i == num_last_subset) {
1054             n_end                         = last_scan_pos - 1;
1055             significant_coeff_flag_idx[0] = last_scan_pos;
1056             nb_significant_coeff_flag     = 1;
1057         } else {
1058             n_end = 15;
1059         }
1060
1061         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1062             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1063         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1064             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1065
1066         for (n = n_end; n >= 0; n--) {
1067             GET_COORD(offset, n);
1068
1069             if (significant_coeff_group_flag[x_cg][y_cg] &&
1070                 (n > 0 || implicit_non_zero_coeff == 0)) {
1071                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1072                                                           log2_trafo_size,
1073                                                           scan_idx,
1074                                                           prev_sig) == 1) {
1075                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1076                     nb_significant_coeff_flag++;
1077                     implicit_non_zero_coeff = 0;
1078                 }
1079             } else {
1080                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1081                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1082                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1083                     nb_significant_coeff_flag++;
1084                 }
1085             }
1086         }
1087
1088         n_end = nb_significant_coeff_flag;
1089
1090         if (n_end) {
1091             int first_nz_pos_in_cg = 16;
1092             int last_nz_pos_in_cg = -1;
1093             int c_rice_param = 0;
1094             int first_greater1_coeff_idx = -1;
1095             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1096             uint16_t coeff_sign_flag;
1097             int sum_abs = 0;
1098             int sign_hidden = 0;
1099
1100             // initialize first elem of coeff_bas_level_greater1_flag
1101             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1102
1103             if (!(i == num_last_subset) && greater1_ctx == 0)
1104                 ctx_set++;
1105             greater1_ctx      = 1;
1106             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1107
1108             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1109                 int n_idx = significant_coeff_flag_idx[m];
1110                 int inc   = (ctx_set << 2) + greater1_ctx;
1111                 coeff_abs_level_greater1_flag[n_idx] =
1112                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1113                 if (coeff_abs_level_greater1_flag[n_idx]) {
1114                     greater1_ctx = 0;
1115                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1116                     greater1_ctx++;
1117                 }
1118
1119                 if (coeff_abs_level_greater1_flag[n_idx] &&
1120                     first_greater1_coeff_idx == -1)
1121                     first_greater1_coeff_idx = n_idx;
1122             }
1123             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1124             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1125                                  !lc->cu.cu_transquant_bypass_flag;
1126
1127             if (first_greater1_coeff_idx != -1) {
1128                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1129             }
1130             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1131                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1132             } else {
1133                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1134             }
1135
1136             for (m = 0; m < n_end; m++) {
1137                 n = significant_coeff_flag_idx[m];
1138                 GET_COORD(offset, n);
1139                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1140                 if (trans_coeff_level == ((m < 8) ?
1141                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1142                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1143
1144                     trans_coeff_level += last_coeff_abs_level_remaining;
1145                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1146                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1147                 }
1148                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1149                     sum_abs += trans_coeff_level;
1150                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1151                         trans_coeff_level = -trans_coeff_level;
1152                 }
1153                 if (coeff_sign_flag >> 15)
1154                     trans_coeff_level = -trans_coeff_level;
1155                 coeff_sign_flag <<= 1;
1156                 if (!lc->cu.cu_transquant_bypass_flag) {
1157                     if (s->sps->scaling_list_enable_flag) {
1158                         if (y_c || x_c || log2_trafo_size < 4) {
1159                             int pos;
1160                             switch (log2_trafo_size) {
1161                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1162                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1163                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1164                             default: pos = (y_c        << 2) +  x_c;
1165                             }
1166                             scale_m = scale_matrix[pos];
1167                         } else {
1168                             scale_m = dc_scale;
1169                         }
1170                     }
1171                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1172                     if(trans_coeff_level < 0) {
1173                         if((~trans_coeff_level) & 0xFffffffffff8000)
1174                             trans_coeff_level = -32768;
1175                     } else {
1176                         if (trans_coeff_level & 0xffffffffffff8000)
1177                             trans_coeff_level = 32767;
1178                     }
1179                 }
1180                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1181             }
1182         }
1183     }
1184
1185     if (lc->cu.cu_transquant_bypass_flag) {
1186         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1187     } else {
1188         if (transform_skip_flag)
1189             s->hevcdsp.transform_skip(dst, coeffs, stride);
1190         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1191                  log2_trafo_size == 2)
1192             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1193         else
1194             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1195     }
1196 }
1197
1198 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1199                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1200                                int log2_cb_size, int log2_trafo_size,
1201                                int trafo_depth, int blk_idx)
1202 {
1203     HEVCLocalContext *lc = &s->HEVClc;
1204
1205     if (lc->cu.pred_mode == MODE_INTRA) {
1206         int trafo_size = 1 << log2_trafo_size;
1207         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1208
1209         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1210         if (log2_trafo_size > 2) {
1211             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1212             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1213             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1214             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1215         } else if (blk_idx == 3) {
1216             trafo_size = trafo_size << s->sps->hshift[1];
1217             ff_hevc_set_neighbour_available(s, xBase, yBase,
1218                                             trafo_size, trafo_size);
1219             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1220             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1221         }
1222     }
1223
1224     if (lc->tt.cbf_luma ||
1225         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1226         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1227         int scan_idx   = SCAN_DIAG;
1228         int scan_idx_c = SCAN_DIAG;
1229
1230         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1231             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1232             if (lc->tu.cu_qp_delta != 0)
1233                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1234                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1235             lc->tu.is_cu_qp_delta_coded = 1;
1236             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1237         }
1238
1239         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1240             if (lc->tu.cur_intra_pred_mode >= 6 &&
1241                 lc->tu.cur_intra_pred_mode <= 14) {
1242                 scan_idx = SCAN_VERT;
1243             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1244                        lc->tu.cur_intra_pred_mode <= 30) {
1245                 scan_idx = SCAN_HORIZ;
1246             }
1247
1248             if (lc->pu.intra_pred_mode_c >=  6 &&
1249                 lc->pu.intra_pred_mode_c <= 14) {
1250                 scan_idx_c = SCAN_VERT;
1251             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1252                        lc->pu.intra_pred_mode_c <= 30) {
1253                 scan_idx_c = SCAN_HORIZ;
1254             }
1255         }
1256
1257         if (lc->tt.cbf_luma)
1258             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1259         if (log2_trafo_size > 2) {
1260             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1261                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1262             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1263                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1264         } else if (blk_idx == 3) {
1265             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1266                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1267             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1268                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1269         }
1270     }
1271 }
1272
1273 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1274 {
1275     int cb_size          = 1 << log2_cb_size;
1276     int log2_min_pu_size = s->sps->log2_min_pu_size;
1277
1278     int min_pu_width     = s->sps->min_pu_width;
1279     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1280     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1281     int i, j;
1282
1283     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1284         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1285             s->is_pcm[i + j * min_pu_width] = 2;
1286 }
1287
1288 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1289                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1290                                int log2_cb_size, int log2_trafo_size,
1291                                int trafo_depth, int blk_idx)
1292 {
1293     HEVCLocalContext *lc = &s->HEVClc;
1294     uint8_t split_transform_flag;
1295
1296     if (trafo_depth > 0 && log2_trafo_size == 2) {
1297         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1298             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1299         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1300             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1301     } else {
1302         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1303         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1304     }
1305
1306     if (lc->cu.intra_split_flag) {
1307         if (trafo_depth == 1)
1308             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1309     } else {
1310         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1311     }
1312
1313     lc->tt.cbf_luma = 1;
1314
1315     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1316                               lc->cu.pred_mode == MODE_INTER &&
1317                               lc->cu.part_mode != PART_2Nx2N &&
1318                               trafo_depth == 0;
1319
1320     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1321         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1322         trafo_depth     < lc->cu.max_trafo_depth       &&
1323         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1324         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1325     } else {
1326         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1327                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1328                                lc->tt.inter_split_flag;
1329     }
1330
1331     if (log2_trafo_size > 2) {
1332         if (trafo_depth == 0 ||
1333             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1334             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1335                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1336         }
1337
1338         if (trafo_depth == 0 ||
1339             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1340             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1341                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1342         }
1343     }
1344
1345     if (split_transform_flag) {
1346         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1347         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1348
1349         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1350                            log2_trafo_size - 1, trafo_depth + 1, 0);
1351         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1352                            log2_trafo_size - 1, trafo_depth + 1, 1);
1353         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1354                            log2_trafo_size - 1, trafo_depth + 1, 2);
1355         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1356                            log2_trafo_size - 1, trafo_depth + 1, 3);
1357     } else {
1358         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1359         int log2_min_tu_size = s->sps->log2_min_tb_size;
1360         int min_tu_width     = s->sps->min_tb_width;
1361
1362         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1363             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1364             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1365             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1366         }
1367
1368         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1369                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1370
1371         // TODO: store cbf_luma somewhere else
1372         if (lc->tt.cbf_luma) {
1373             int i, j;
1374             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1375                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1376                     int x_tu = (x0 + j) >> log2_min_tu_size;
1377                     int y_tu = (y0 + i) >> log2_min_tu_size;
1378                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1379                 }
1380         }
1381         if (!s->sh.disable_deblocking_filter_flag) {
1382             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1383                                                   lc->slice_or_tiles_up_boundary,
1384                                                   lc->slice_or_tiles_left_boundary);
1385             if (s->pps->transquant_bypass_enable_flag &&
1386                 lc->cu.cu_transquant_bypass_flag)
1387                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1388         }
1389     }
1390 }
1391
1392 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1393 {
1394     //TODO: non-4:2:0 support
1395     HEVCLocalContext *lc = &s->HEVClc;
1396     GetBitContext gb;
1397     int cb_size   = 1 << log2_cb_size;
1398     int stride0   = s->frame->linesize[0];
1399     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1400     int   stride1 = s->frame->linesize[1];
1401     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1402     int   stride2 = s->frame->linesize[2];
1403     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1404
1405     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth;
1406     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1407     int ret;
1408
1409     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1410                                           lc->slice_or_tiles_up_boundary,
1411                                           lc->slice_or_tiles_left_boundary);
1412
1413     ret = init_get_bits(&gb, pcm, length);
1414     if (ret < 0)
1415         return ret;
1416
1417     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1418     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1419     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1420     return 0;
1421 }
1422
1423 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1424 {
1425     HEVCLocalContext *lc = &s->HEVClc;
1426     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1427     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1428
1429     if (x)
1430         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1431     if (y)
1432         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1433
1434     switch (x) {
1435     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1436     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1437     case 0: lc->pu.mvd.x = 0;                               break;
1438     }
1439
1440     switch (y) {
1441     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1442     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1443     case 0: lc->pu.mvd.y = 0;                               break;
1444     }
1445 }
1446
1447 /**
1448  * 8.5.3.2.2.1 Luma sample interpolation process
1449  *
1450  * @param s HEVC decoding context
1451  * @param dst target buffer for block data at block position
1452  * @param dststride stride of the dst buffer
1453  * @param ref reference picture buffer at origin (0, 0)
1454  * @param mv motion vector (relative to block position) to get pixel data from
1455  * @param x_off horizontal position of block from origin (0, 0)
1456  * @param y_off vertical position of block from origin (0, 0)
1457  * @param block_w width of block
1458  * @param block_h height of block
1459  */
1460 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1461                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1462                     int block_w, int block_h)
1463 {
1464     HEVCLocalContext *lc = &s->HEVClc;
1465     uint8_t *src         = ref->data[0];
1466     ptrdiff_t srcstride  = ref->linesize[0];
1467     int pic_width        = s->sps->width;
1468     int pic_height       = s->sps->height;
1469
1470     int mx         = mv->x & 3;
1471     int my         = mv->y & 3;
1472     int extra_left = ff_hevc_qpel_extra_before[mx];
1473     int extra_top  = ff_hevc_qpel_extra_before[my];
1474
1475     x_off += mv->x >> 2;
1476     y_off += mv->y >> 2;
1477     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1478
1479     if (x_off < extra_left || y_off < extra_top ||
1480         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1481         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1482         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1483
1484         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset, srcstride,
1485                                  block_w + ff_hevc_qpel_extra[mx],
1486                                  block_h + ff_hevc_qpel_extra[my],
1487                                  x_off - extra_left, y_off - extra_top,
1488                                  pic_width, pic_height);
1489         src = lc->edge_emu_buffer + offset;
1490     }
1491     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1492                                      block_h, lc->mc_buffer);
1493 }
1494
1495 /**
1496  * 8.5.3.2.2.2 Chroma sample interpolation process
1497  *
1498  * @param s HEVC decoding context
1499  * @param dst1 target buffer for block data at block position (U plane)
1500  * @param dst2 target buffer for block data at block position (V plane)
1501  * @param dststride stride of the dst1 and dst2 buffers
1502  * @param ref reference picture buffer at origin (0, 0)
1503  * @param mv motion vector (relative to block position) to get pixel data from
1504  * @param x_off horizontal position of block from origin (0, 0)
1505  * @param y_off vertical position of block from origin (0, 0)
1506  * @param block_w width of block
1507  * @param block_h height of block
1508  */
1509 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1510                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1511                       int x_off, int y_off, int block_w, int block_h)
1512 {
1513     HEVCLocalContext *lc = &s->HEVClc;
1514     uint8_t *src1        = ref->data[1];
1515     uint8_t *src2        = ref->data[2];
1516     ptrdiff_t src1stride = ref->linesize[1];
1517     ptrdiff_t src2stride = ref->linesize[2];
1518     int pic_width        = s->sps->width >> 1;
1519     int pic_height       = s->sps->height >> 1;
1520
1521     int mx = mv->x & 7;
1522     int my = mv->y & 7;
1523
1524     x_off += mv->x >> 3;
1525     y_off += mv->y >> 3;
1526     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1527     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1528
1529     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1530         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1531         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1532         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1533         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1534
1535         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1, src1stride,
1536                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1537                                  x_off - EPEL_EXTRA_BEFORE,
1538                                  y_off - EPEL_EXTRA_BEFORE,
1539                                  pic_width, pic_height);
1540
1541         src1 = lc->edge_emu_buffer + offset1;
1542         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1543                                              block_w, block_h, mx, my, lc->mc_buffer);
1544
1545         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2, src2stride,
1546                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1547                                  x_off - EPEL_EXTRA_BEFORE,
1548                                  y_off - EPEL_EXTRA_BEFORE,
1549                                  pic_width, pic_height);
1550         src2 = lc->edge_emu_buffer + offset2;
1551         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1552                                              block_w, block_h, mx, my,
1553                                              lc->mc_buffer);
1554     } else {
1555         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1556                                              block_w, block_h, mx, my,
1557                                              lc->mc_buffer);
1558         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1559                                              block_w, block_h, mx, my,
1560                                              lc->mc_buffer);
1561     }
1562 }
1563
1564 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1565                                 const Mv *mv, int y0, int height)
1566 {
1567     int y = (mv->y >> 2) + y0 + height + 9;
1568     ff_thread_await_progress(&ref->tf, y, 0);
1569 }
1570
1571 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1572                                 int nPbW, int nPbH,
1573                                 int log2_cb_size, int partIdx)
1574 {
1575 #define POS(c_idx, x, y)                                                              \
1576     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1577                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1578     HEVCLocalContext *lc = &s->HEVClc;
1579     int merge_idx = 0;
1580     struct MvField current_mv = {{{ 0 }}};
1581
1582     int min_pu_width = s->sps->min_pu_width;
1583
1584     MvField *tab_mvf = s->ref->tab_mvf;
1585     RefPicList  *refPicList = s->ref->refPicList;
1586     HEVCFrame *ref0, *ref1;
1587
1588     int tmpstride = MAX_PB_SIZE;
1589
1590     uint8_t *dst0 = POS(0, x0, y0);
1591     uint8_t *dst1 = POS(1, x0, y0);
1592     uint8_t *dst2 = POS(2, x0, y0);
1593     int log2_min_cb_size = s->sps->log2_min_cb_size;
1594     int min_cb_width     = s->sps->min_cb_width;
1595     int x_cb             = x0 >> log2_min_cb_size;
1596     int y_cb             = y0 >> log2_min_cb_size;
1597     int ref_idx[2];
1598     int mvp_flag[2];
1599     int x_pu, y_pu;
1600     int i, j;
1601
1602     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1603         if (s->sh.max_num_merge_cand > 1)
1604             merge_idx = ff_hevc_merge_idx_decode(s);
1605         else
1606             merge_idx = 0;
1607
1608         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1609                                    1 << log2_cb_size,
1610                                    1 << log2_cb_size,
1611                                    log2_cb_size, partIdx,
1612                                    merge_idx, &current_mv);
1613         x_pu = x0 >> s->sps->log2_min_pu_size;
1614         y_pu = y0 >> s->sps->log2_min_pu_size;
1615
1616         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1617             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1618                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1619     } else { /* MODE_INTER */
1620         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1621         if (lc->pu.merge_flag) {
1622             if (s->sh.max_num_merge_cand > 1)
1623                 merge_idx = ff_hevc_merge_idx_decode(s);
1624             else
1625                 merge_idx = 0;
1626
1627             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1628                                        partIdx, merge_idx, &current_mv);
1629             x_pu = x0 >> s->sps->log2_min_pu_size;
1630             y_pu = y0 >> s->sps->log2_min_pu_size;
1631
1632             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1633                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1634                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1635         } else {
1636             enum InterPredIdc inter_pred_idc = PRED_L0;
1637             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1638             if (s->sh.slice_type == B_SLICE)
1639                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1640
1641             if (inter_pred_idc != PRED_L1) {
1642                 if (s->sh.nb_refs[L0]) {
1643                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1644                     current_mv.ref_idx[0] = ref_idx[0];
1645                 }
1646                 current_mv.pred_flag[0] = 1;
1647                 hls_mvd_coding(s, x0, y0, 0);
1648                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1649                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1650                                          partIdx, merge_idx, &current_mv,
1651                                          mvp_flag[0], 0);
1652                 current_mv.mv[0].x += lc->pu.mvd.x;
1653                 current_mv.mv[0].y += lc->pu.mvd.y;
1654             }
1655
1656             if (inter_pred_idc != PRED_L0) {
1657                 if (s->sh.nb_refs[L1]) {
1658                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1659                     current_mv.ref_idx[1] = ref_idx[1];
1660                 }
1661
1662                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1663                     lc->pu.mvd.x = 0;
1664                     lc->pu.mvd.y = 0;
1665                 } else {
1666                     hls_mvd_coding(s, x0, y0, 1);
1667                 }
1668
1669                 current_mv.pred_flag[1] = 1;
1670                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1671                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1672                                          partIdx, merge_idx, &current_mv,
1673                                          mvp_flag[1], 1);
1674                 current_mv.mv[1].x += lc->pu.mvd.x;
1675                 current_mv.mv[1].y += lc->pu.mvd.y;
1676             }
1677
1678             x_pu = x0 >> s->sps->log2_min_pu_size;
1679             y_pu = y0 >> s->sps->log2_min_pu_size;
1680
1681             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1682                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1683                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1684         }
1685     }
1686
1687     if (current_mv.pred_flag[0]) {
1688         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1689         if (!ref0)
1690             return;
1691         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1692     }
1693     if (current_mv.pred_flag[1]) {
1694         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1695         if (!ref1)
1696             return;
1697         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1698     }
1699
1700     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1701         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1702         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1703
1704         luma_mc(s, tmp, tmpstride, ref0->frame,
1705                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1706
1707         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1708             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1709             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1710                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1711                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1712                                      dst0, s->frame->linesize[0], tmp,
1713                                      tmpstride, nPbW, nPbH);
1714         } else {
1715             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1716         }
1717         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1718                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1719
1720         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1721             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1722             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1723                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1724                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1725                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1726                                      nPbW / 2, nPbH / 2);
1727             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1728                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1729                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1730                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1731                                      nPbW / 2, nPbH / 2);
1732         } else {
1733             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1734             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1735         }
1736     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1737         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1738         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1739
1740         if (!ref1)
1741             return;
1742
1743         luma_mc(s, tmp, tmpstride, ref1->frame,
1744                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1745
1746         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1747             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1748             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1749                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1750                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1751                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1752                                       nPbW, nPbH);
1753         } else {
1754             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1755         }
1756
1757         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1758                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1759
1760         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1761             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1762             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1763                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1764                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1765                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1766             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1767                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1768                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1769                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1770         } else {
1771             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1772             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1773         }
1774     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1775         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1776         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1777         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1778         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1779         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1780         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1781
1782         if (!ref0 || !ref1)
1783             return;
1784
1785         luma_mc(s, tmp, tmpstride, ref0->frame,
1786                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1787         luma_mc(s, tmp2, tmpstride, ref1->frame,
1788                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1789
1790         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1791             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1792             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1793                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1794                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1795                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1796                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1797                                          dst0, s->frame->linesize[0],
1798                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1799         } else {
1800             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1801                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1802         }
1803
1804         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1805                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1806         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1807                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1808
1809         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1810             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1811             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1812                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1813                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1814                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1815                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1816                                          dst1, s->frame->linesize[1], tmp, tmp3,
1817                                          tmpstride, nPbW / 2, nPbH / 2);
1818             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1819                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1820                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1821                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1822                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1823                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1824                                          tmpstride, nPbW / 2, nPbH / 2);
1825         } else {
1826             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1827             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1828         }
1829     }
1830 }
1831
1832 /**
1833  * 8.4.1
1834  */
1835 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1836                                 int prev_intra_luma_pred_flag)
1837 {
1838     HEVCLocalContext *lc = &s->HEVClc;
1839     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1840     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1841     int min_pu_width     = s->sps->min_pu_width;
1842     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1843     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1844     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1845
1846     int cand_up   = (lc->ctb_up_flag || y0b) ?
1847                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1848     int cand_left = (lc->ctb_left_flag || x0b) ?
1849                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1850
1851     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1852
1853     MvField *tab_mvf = s->ref->tab_mvf;
1854     int intra_pred_mode;
1855     int candidate[3];
1856     int i, j;
1857
1858     // intra_pred_mode prediction does not cross vertical CTB boundaries
1859     if ((y0 - 1) < y_ctb)
1860         cand_up = INTRA_DC;
1861
1862     if (cand_left == cand_up) {
1863         if (cand_left < 2) {
1864             candidate[0] = INTRA_PLANAR;
1865             candidate[1] = INTRA_DC;
1866             candidate[2] = INTRA_ANGULAR_26;
1867         } else {
1868             candidate[0] = cand_left;
1869             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1870             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1871         }
1872     } else {
1873         candidate[0] = cand_left;
1874         candidate[1] = cand_up;
1875         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1876             candidate[2] = INTRA_PLANAR;
1877         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1878             candidate[2] = INTRA_DC;
1879         } else {
1880             candidate[2] = INTRA_ANGULAR_26;
1881         }
1882     }
1883
1884     if (prev_intra_luma_pred_flag) {
1885         intra_pred_mode = candidate[lc->pu.mpm_idx];
1886     } else {
1887         if (candidate[0] > candidate[1])
1888             FFSWAP(uint8_t, candidate[0], candidate[1]);
1889         if (candidate[0] > candidate[2])
1890             FFSWAP(uint8_t, candidate[0], candidate[2]);
1891         if (candidate[1] > candidate[2])
1892             FFSWAP(uint8_t, candidate[1], candidate[2]);
1893
1894         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1895         for (i = 0; i < 3; i++)
1896             if (intra_pred_mode >= candidate[i])
1897                 intra_pred_mode++;
1898     }
1899
1900     /* write the intra prediction units into the mv array */
1901     if (!size_in_pus)
1902         size_in_pus = 1;
1903     for (i = 0; i < size_in_pus; i++) {
1904         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1905                intra_pred_mode, size_in_pus);
1906
1907         for (j = 0; j < size_in_pus; j++) {
1908             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1909             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1910             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1911             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1912             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1913             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1914             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1915             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1916             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1917         }
1918     }
1919
1920     return intra_pred_mode;
1921 }
1922
1923 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1924                                           int log2_cb_size, int ct_depth)
1925 {
1926     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1927     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1928     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1929     int y;
1930
1931     for (y = 0; y < length; y++)
1932         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1933                ct_depth, length);
1934 }
1935
1936 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1937                                   int log2_cb_size)
1938 {
1939     HEVCLocalContext *lc = &s->HEVClc;
1940     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1941     uint8_t prev_intra_luma_pred_flag[4];
1942     int split   = lc->cu.part_mode == PART_NxN;
1943     int pb_size = (1 << log2_cb_size) >> split;
1944     int side    = split + 1;
1945     int chroma_mode;
1946     int i, j;
1947
1948     for (i = 0; i < side; i++)
1949         for (j = 0; j < side; j++)
1950             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1951
1952     for (i = 0; i < side; i++) {
1953         for (j = 0; j < side; j++) {
1954             if (prev_intra_luma_pred_flag[2 * i + j])
1955                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1956             else
1957                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1958
1959             lc->pu.intra_pred_mode[2 * i + j] =
1960                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1961                                      prev_intra_luma_pred_flag[2 * i + j]);
1962         }
1963     }
1964
1965     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1966     if (chroma_mode != 4) {
1967         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1968             lc->pu.intra_pred_mode_c = 34;
1969         else
1970             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1971     } else {
1972         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1973     }
1974 }
1975
1976 static void intra_prediction_unit_default_value(HEVCContext *s,
1977                                                 int x0, int y0,
1978                                                 int log2_cb_size)
1979 {
1980     HEVCLocalContext *lc = &s->HEVClc;
1981     int pb_size          = 1 << log2_cb_size;
1982     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1983     int min_pu_width     = s->sps->min_pu_width;
1984     MvField *tab_mvf     = s->ref->tab_mvf;
1985     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1986     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1987     int j, k;
1988
1989     if (size_in_pus == 0)
1990         size_in_pus = 1;
1991     for (j = 0; j < size_in_pus; j++) {
1992         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1993         for (k = 0; k < size_in_pus; k++)
1994             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
1995     }
1996 }
1997
1998 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1999 {
2000     int cb_size          = 1 << log2_cb_size;
2001     HEVCLocalContext *lc = &s->HEVClc;
2002     int log2_min_cb_size = s->sps->log2_min_cb_size;
2003     int length           = cb_size >> log2_min_cb_size;
2004     int min_cb_width     = s->sps->min_cb_width;
2005     int x_cb             = x0 >> log2_min_cb_size;
2006     int y_cb             = y0 >> log2_min_cb_size;
2007     int x, y;
2008
2009     lc->cu.x                = x0;
2010     lc->cu.y                = y0;
2011     lc->cu.rqt_root_cbf     = 1;
2012     lc->cu.pred_mode        = MODE_INTRA;
2013     lc->cu.part_mode        = PART_2Nx2N;
2014     lc->cu.intra_split_flag = 0;
2015     lc->cu.pcm_flag         = 0;
2016
2017     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2018     for (x = 0; x < 4; x++)
2019         lc->pu.intra_pred_mode[x] = 1;
2020     if (s->pps->transquant_bypass_enable_flag) {
2021         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2022         if (lc->cu.cu_transquant_bypass_flag)
2023             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2024     } else
2025         lc->cu.cu_transquant_bypass_flag = 0;
2026
2027     if (s->sh.slice_type != I_SLICE) {
2028         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2029
2030         lc->cu.pred_mode = MODE_SKIP;
2031         x = y_cb * min_cb_width + x_cb;
2032         for (y = 0; y < length; y++) {
2033             memset(&s->skip_flag[x], skip_flag, length);
2034             x += min_cb_width;
2035         }
2036         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2037     }
2038
2039     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2040         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2041         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2042
2043         if (!s->sh.disable_deblocking_filter_flag)
2044             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2045                                                   lc->slice_or_tiles_up_boundary,
2046                                                   lc->slice_or_tiles_left_boundary);
2047     } else {
2048         if (s->sh.slice_type != I_SLICE)
2049             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2050         if (lc->cu.pred_mode != MODE_INTRA ||
2051             log2_cb_size == s->sps->log2_min_cb_size) {
2052             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2053             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2054                                       lc->cu.pred_mode == MODE_INTRA;
2055         }
2056
2057         if (lc->cu.pred_mode == MODE_INTRA) {
2058             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2059                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2060                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2061                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2062             }
2063             if (lc->cu.pcm_flag) {
2064                 int ret;
2065                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2066                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2067                 if (s->sps->pcm.loop_filter_disable_flag)
2068                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2069
2070                 if (ret < 0)
2071                     return ret;
2072             } else {
2073                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2074             }
2075         } else {
2076             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2077             switch (lc->cu.part_mode) {
2078             case PART_2Nx2N:
2079                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2080                 break;
2081             case PART_2NxN:
2082                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2083                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2084                 break;
2085             case PART_Nx2N:
2086                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2087                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2088                 break;
2089             case PART_2NxnU:
2090                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2091                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2092                 break;
2093             case PART_2NxnD:
2094                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2095                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2096                 break;
2097             case PART_nLx2N:
2098                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2099                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2100                 break;
2101             case PART_nRx2N:
2102                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2103                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2104                 break;
2105             case PART_NxN:
2106                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2107                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2108                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2109                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2110                 break;
2111             }
2112         }
2113
2114         if (!lc->cu.pcm_flag) {
2115             if (lc->cu.pred_mode != MODE_INTRA &&
2116                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2117                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2118             }
2119             if (lc->cu.rqt_root_cbf) {
2120                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2121                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2122                                          s->sps->max_transform_hierarchy_depth_inter;
2123                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2124                                    log2_cb_size, 0, 0);
2125             } else {
2126                 if (!s->sh.disable_deblocking_filter_flag)
2127                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2128                                                           lc->slice_or_tiles_up_boundary,
2129                                                           lc->slice_or_tiles_left_boundary);
2130             }
2131         }
2132     }
2133
2134     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2135         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2136
2137     x = y_cb * min_cb_width + x_cb;
2138     for (y = 0; y < length; y++) {
2139         memset(&s->qp_y_tab[x], lc->qp_y, length);
2140         x += min_cb_width;
2141     }
2142
2143     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2144
2145     return 0;
2146 }
2147
2148 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2149                                int log2_cb_size, int cb_depth)
2150 {
2151     HEVCLocalContext *lc = &s->HEVClc;
2152     const int cb_size    = 1 << log2_cb_size;
2153
2154     lc->ct.depth = cb_depth;
2155     if (x0 + cb_size <= s->sps->width  &&
2156         y0 + cb_size <= s->sps->height &&
2157         log2_cb_size > s->sps->log2_min_cb_size) {
2158         SAMPLE(s->split_cu_flag, x0, y0) =
2159             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2160     } else {
2161         SAMPLE(s->split_cu_flag, x0, y0) =
2162             (log2_cb_size > s->sps->log2_min_cb_size);
2163     }
2164     if (s->pps->cu_qp_delta_enabled_flag &&
2165         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2166         lc->tu.is_cu_qp_delta_coded = 0;
2167         lc->tu.cu_qp_delta          = 0;
2168     }
2169
2170     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2171         const int cb_size_split = cb_size >> 1;
2172         const int x1 = x0 + cb_size_split;
2173         const int y1 = y0 + cb_size_split;
2174
2175         log2_cb_size--;
2176         cb_depth++;
2177
2178 #define SUBDIVIDE(x, y)                                                \
2179 do {                                                                   \
2180     if (x < s->sps->width && y < s->sps->height) {                     \
2181         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2182         if (ret < 0)                                                   \
2183             return ret;                                                \
2184     }                                                                  \
2185 } while (0)
2186
2187         SUBDIVIDE(x0, y0);
2188         SUBDIVIDE(x1, y0);
2189         SUBDIVIDE(x0, y1);
2190         SUBDIVIDE(x1, y1);
2191     } else {
2192         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2193         if (ret < 0)
2194             return ret;
2195     }
2196
2197     return 0;
2198 }
2199
2200 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2201                                  int ctb_addr_ts)
2202 {
2203     HEVCLocalContext *lc  = &s->HEVClc;
2204     int ctb_size          = 1 << s->sps->log2_ctb_size;
2205     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2206     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2207
2208     int tile_left_boundary, tile_up_boundary;
2209     int slice_left_boundary, slice_up_boundary;
2210
2211     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2212
2213     if (s->pps->entropy_coding_sync_enabled_flag) {
2214         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2215             lc->first_qp_group = 1;
2216         lc->end_of_tiles_x = s->sps->width;
2217     } else if (s->pps->tiles_enabled_flag) {
2218         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2219             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2220             lc->start_of_tiles_x = x_ctb;
2221             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2222             lc->first_qp_group   = 1;
2223         }
2224     } else {
2225         lc->end_of_tiles_x = s->sps->width;
2226     }
2227
2228     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2229
2230     if (s->pps->tiles_enabled_flag) {
2231         tile_left_boundary  = x_ctb > 0 &&
2232                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2233         slice_left_boundary = x_ctb > 0 &&
2234                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2235         tile_up_boundary  = y_ctb > 0 &&
2236                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2237         slice_up_boundary = y_ctb > 0 &&
2238                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2239     } else {
2240         tile_left_boundary  =
2241         tile_up_boundary    = 1;
2242         slice_left_boundary = ctb_addr_in_slice > 0;
2243         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2244     }
2245     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2246     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2247     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2248     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2249     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2250     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2251 }
2252
2253 static int hls_slice_data(HEVCContext *s)
2254 {
2255     int ctb_size    = 1 << s->sps->log2_ctb_size;
2256     int more_data   = 1;
2257     int x_ctb       = 0;
2258     int y_ctb       = 0;
2259     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2260     int ret;
2261
2262     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2263         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2264
2265         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2266         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2267         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2268
2269         ff_hevc_cabac_init(s, ctb_addr_ts);
2270
2271         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2272
2273         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2274         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2275         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2276
2277         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2278         if (ret < 0)
2279             return ret;
2280         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2281
2282         ctb_addr_ts++;
2283         ff_hevc_save_states(s, ctb_addr_ts);
2284         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2285     }
2286
2287     if (x_ctb + ctb_size >= s->sps->width &&
2288         y_ctb + ctb_size >= s->sps->height)
2289         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2290
2291     return ctb_addr_ts;
2292 }
2293
2294 /**
2295  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2296  * 0 if the unit should be skipped, 1 otherwise
2297  */
2298 static int hls_nal_unit(HEVCContext *s)
2299 {
2300     GetBitContext *gb = &s->HEVClc.gb;
2301     int nuh_layer_id;
2302
2303     if (get_bits1(gb) != 0)
2304         return AVERROR_INVALIDDATA;
2305
2306     s->nal_unit_type = get_bits(gb, 6);
2307
2308     nuh_layer_id   = get_bits(gb, 6);
2309     s->temporal_id = get_bits(gb, 3) - 1;
2310     if (s->temporal_id < 0)
2311         return AVERROR_INVALIDDATA;
2312
2313     av_log(s->avctx, AV_LOG_DEBUG,
2314            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2315            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2316
2317     return nuh_layer_id == 0;
2318 }
2319
2320 static void restore_tqb_pixels(HEVCContext *s)
2321 {
2322     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2323     int x, y, c_idx;
2324
2325     for (c_idx = 0; c_idx < 3; c_idx++) {
2326         ptrdiff_t stride = s->frame->linesize[c_idx];
2327         int hshift       = s->sps->hshift[c_idx];
2328         int vshift       = s->sps->vshift[c_idx];
2329         for (y = 0; y < s->sps->min_pu_height; y++) {
2330             for (x = 0; x < s->sps->min_pu_width; x++) {
2331                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2332                     int n;
2333                     int len      = min_pu_size >> hshift;
2334                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2335                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2336                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2337                         memcpy(dst, src, len);
2338                         src += stride;
2339                         dst += stride;
2340                     }
2341                 }
2342             }
2343         }
2344     }
2345 }
2346
2347 static int hevc_frame_start(HEVCContext *s)
2348 {
2349     HEVCLocalContext *lc = &s->HEVClc;
2350     int ret;
2351
2352     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2353     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2354     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2355     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2356
2357     lc->start_of_tiles_x = 0;
2358     s->is_decoded        = 0;
2359
2360     if (s->pps->tiles_enabled_flag)
2361         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2362
2363     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2364                               s->poc);
2365     if (ret < 0)
2366         goto fail;
2367
2368     av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
2369                    (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
2370     if (!lc->edge_emu_buffer) {
2371         ret = AVERROR(ENOMEM);
2372         goto fail;
2373     }
2374
2375     ret = ff_hevc_frame_rps(s);
2376     if (ret < 0) {
2377         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2378         goto fail;
2379     }
2380
2381     av_frame_unref(s->output_frame);
2382     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2383     if (ret < 0)
2384         goto fail;
2385
2386     ff_thread_finish_setup(s->avctx);
2387
2388     return 0;
2389
2390 fail:
2391     if (s->ref)
2392         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2393     s->ref = NULL;
2394     return ret;
2395 }
2396
2397 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2398 {
2399     HEVCLocalContext *lc = &s->HEVClc;
2400     GetBitContext *gb    = &lc->gb;
2401     int ctb_addr_ts, ret;
2402
2403     ret = init_get_bits8(gb, nal, length);
2404     if (ret < 0)
2405         return ret;
2406
2407     ret = hls_nal_unit(s);
2408     if (ret < 0) {
2409         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2410                s->nal_unit_type);
2411         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2412             return ret;
2413         return 0;
2414     } else if (!ret)
2415         return 0;
2416
2417     switch (s->nal_unit_type) {
2418     case NAL_VPS:
2419         ret = ff_hevc_decode_nal_vps(s);
2420         if (ret < 0)
2421             return ret;
2422         break;
2423     case NAL_SPS:
2424         ret = ff_hevc_decode_nal_sps(s);
2425         if (ret < 0)
2426             return ret;
2427         break;
2428     case NAL_PPS:
2429         ret = ff_hevc_decode_nal_pps(s);
2430         if (ret < 0)
2431             return ret;
2432         break;
2433     case NAL_SEI_PREFIX:
2434     case NAL_SEI_SUFFIX:
2435         ret = ff_hevc_decode_nal_sei(s);
2436         if (ret < 0)
2437             return ret;
2438         break;
2439     case NAL_TRAIL_R:
2440     case NAL_TRAIL_N:
2441     case NAL_TSA_N:
2442     case NAL_TSA_R:
2443     case NAL_STSA_N:
2444     case NAL_STSA_R:
2445     case NAL_BLA_W_LP:
2446     case NAL_BLA_W_RADL:
2447     case NAL_BLA_N_LP:
2448     case NAL_IDR_W_RADL:
2449     case NAL_IDR_N_LP:
2450     case NAL_CRA_NUT:
2451     case NAL_RADL_N:
2452     case NAL_RADL_R:
2453     case NAL_RASL_N:
2454     case NAL_RASL_R:
2455         ret = hls_slice_header(s);
2456         if (ret < 0)
2457             return ret;
2458
2459         if (s->max_ra == INT_MAX) {
2460             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2461                 s->max_ra = s->poc;
2462             } else {
2463                 if (IS_IDR(s))
2464                     s->max_ra = INT_MIN;
2465             }
2466         }
2467
2468         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2469             s->poc <= s->max_ra) {
2470             s->is_decoded = 0;
2471             break;
2472         } else {
2473             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2474                 s->max_ra = INT_MIN;
2475         }
2476
2477         if (s->sh.first_slice_in_pic_flag) {
2478             ret = hevc_frame_start(s);
2479             if (ret < 0)
2480                 return ret;
2481         } else if (!s->ref) {
2482             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2483             return AVERROR_INVALIDDATA;
2484         }
2485
2486         if (!s->sh.dependent_slice_segment_flag &&
2487             s->sh.slice_type != I_SLICE) {
2488             ret = ff_hevc_slice_rpl(s);
2489             if (ret < 0) {
2490                 av_log(s->avctx, AV_LOG_WARNING,
2491                        "Error constructing the reference lists for the current slice.\n");
2492                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2493                     return ret;
2494             }
2495         }
2496
2497         ctb_addr_ts = hls_slice_data(s);
2498         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2499             s->is_decoded = 1;
2500             if ((s->pps->transquant_bypass_enable_flag ||
2501                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2502                 s->sps->sao_enabled)
2503                 restore_tqb_pixels(s);
2504         }
2505
2506         if (ctb_addr_ts < 0)
2507             return ctb_addr_ts;
2508         break;
2509     case NAL_EOS_NUT:
2510     case NAL_EOB_NUT:
2511         s->seq_decode = (s->seq_decode + 1) & 0xff;
2512         s->max_ra     = INT_MAX;
2513         break;
2514     case NAL_AUD:
2515     case NAL_FD_NUT:
2516         break;
2517     default:
2518         av_log(s->avctx, AV_LOG_INFO,
2519                "Skipping NAL unit %d\n", s->nal_unit_type);
2520     }
2521
2522     return 0;
2523 }
2524
2525 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2526  * between these functions would be nice. */
2527 static int extract_rbsp(const uint8_t *src, int length,
2528                         HEVCNAL *nal)
2529 {
2530     int i, si, di;
2531     uint8_t *dst;
2532
2533 #define STARTCODE_TEST                                                  \
2534         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2535             if (src[i + 2] != 3) {                                      \
2536                 /* startcode, so we must be past the end */             \
2537                 length = i;                                             \
2538             }                                                           \
2539             break;                                                      \
2540         }
2541 #if HAVE_FAST_UNALIGNED
2542 #define FIND_FIRST_ZERO                                                 \
2543         if (i > 0 && !src[i])                                           \
2544             i--;                                                        \
2545         while (src[i])                                                  \
2546             i++
2547 #if HAVE_FAST_64BIT
2548     for (i = 0; i + 1 < length; i += 9) {
2549         if (!((~AV_RN64A(src + i) &
2550                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2551               0x8000800080008080ULL))
2552             continue;
2553         FIND_FIRST_ZERO;
2554         STARTCODE_TEST;
2555         i -= 7;
2556     }
2557 #else
2558     for (i = 0; i + 1 < length; i += 5) {
2559         if (!((~AV_RN32A(src + i) &
2560                (AV_RN32A(src + i) - 0x01000101U)) &
2561               0x80008080U))
2562             continue;
2563         FIND_FIRST_ZERO;
2564         STARTCODE_TEST;
2565         i -= 3;
2566     }
2567 #endif /* HAVE_FAST_64BIT */
2568 #else
2569     for (i = 0; i + 1 < length; i += 2) {
2570         if (src[i])
2571             continue;
2572         if (i > 0 && src[i - 1] == 0)
2573             i--;
2574         STARTCODE_TEST;
2575     }
2576 #endif /* HAVE_FAST_UNALIGNED */
2577
2578     if (i >= length - 1) { // no escaped 0
2579         nal->data = src;
2580         nal->size = length;
2581         return length;
2582     }
2583
2584     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2585                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2586     if (!nal->rbsp_buffer)
2587         return AVERROR(ENOMEM);
2588
2589     dst = nal->rbsp_buffer;
2590
2591     memcpy(dst, src, i);
2592     si = di = i;
2593     while (si + 2 < length) {
2594         // remove escapes (very rare 1:2^22)
2595         if (src[si + 2] > 3) {
2596             dst[di++] = src[si++];
2597             dst[di++] = src[si++];
2598         } else if (src[si] == 0 && src[si + 1] == 0) {
2599             if (src[si + 2] == 3) { // escape
2600                 dst[di++] = 0;
2601                 dst[di++] = 0;
2602                 si       += 3;
2603
2604                 continue;
2605             } else // next start code
2606                 goto nsc;
2607         }
2608
2609         dst[di++] = src[si++];
2610     }
2611     while (si < length)
2612         dst[di++] = src[si++];
2613
2614 nsc:
2615     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2616
2617     nal->data = dst;
2618     nal->size = di;
2619     return si;
2620 }
2621
2622 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2623 {
2624     int i, consumed, ret = 0;
2625
2626     s->ref = NULL;
2627     s->eos = 0;
2628
2629     /* split the input packet into NAL units, so we know the upper bound on the
2630      * number of slices in the frame */
2631     s->nb_nals = 0;
2632     while (length >= 4) {
2633         HEVCNAL *nal;
2634         int extract_length = 0;
2635
2636         if (s->is_nalff) {
2637             int i;
2638             for (i = 0; i < s->nal_length_size; i++)
2639                 extract_length = (extract_length << 8) | buf[i];
2640             buf    += s->nal_length_size;
2641             length -= s->nal_length_size;
2642
2643             if (extract_length > length) {
2644                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2645                 ret = AVERROR_INVALIDDATA;
2646                 goto fail;
2647             }
2648         } else {
2649             if (buf[2] == 0) {
2650                 length--;
2651                 buf++;
2652                 continue;
2653             }
2654             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2655                 ret = AVERROR_INVALIDDATA;
2656                 goto fail;
2657             }
2658
2659             buf           += 3;
2660             length        -= 3;
2661             extract_length = length;
2662         }
2663
2664         if (s->nals_allocated < s->nb_nals + 1) {
2665             int new_size = s->nals_allocated + 1;
2666             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2667             if (!tmp) {
2668                 ret = AVERROR(ENOMEM);
2669                 goto fail;
2670             }
2671             s->nals = tmp;
2672             memset(s->nals + s->nals_allocated, 0,
2673                    (new_size - s->nals_allocated) * sizeof(*tmp));
2674             s->nals_allocated = new_size;
2675         }
2676         nal = &s->nals[s->nb_nals++];
2677
2678         consumed = extract_rbsp(buf, extract_length, nal);
2679         if (consumed < 0) {
2680             ret = consumed;
2681             goto fail;
2682         }
2683
2684         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2685         if (ret < 0)
2686             goto fail;
2687         hls_nal_unit(s);
2688
2689         if (s->nal_unit_type == NAL_EOB_NUT ||
2690             s->nal_unit_type == NAL_EOS_NUT)
2691             s->eos = 1;
2692
2693         buf    += consumed;
2694         length -= consumed;
2695     }
2696
2697     /* parse the NAL units */
2698     for (i = 0; i < s->nb_nals; i++) {
2699         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2700         if (ret < 0) {
2701             av_log(s->avctx, AV_LOG_WARNING,
2702                    "Error parsing NAL unit #%d.\n", i);
2703             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2704                 goto fail;
2705         }
2706     }
2707
2708 fail:
2709     if (s->ref)
2710         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2711
2712     return ret;
2713 }
2714
2715 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2716 {
2717     int i;
2718     for (i = 0; i < 16; i++)
2719         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2720 }
2721
2722 static int verify_md5(HEVCContext *s, AVFrame *frame)
2723 {
2724     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2725     int pixel_shift = desc->comp[0].depth_minus1 > 7;
2726     int i, j;
2727
2728     if (!desc)
2729         return AVERROR(EINVAL);
2730
2731     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2732            s->poc);
2733
2734     /* the checksums are LE, so we have to byteswap for >8bpp formats
2735      * on BE arches */
2736 #if HAVE_BIGENDIAN
2737     if (pixel_shift && !s->checksum_buf) {
2738         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2739                        FFMAX3(frame->linesize[0], frame->linesize[1],
2740                               frame->linesize[2]));
2741         if (!s->checksum_buf)
2742             return AVERROR(ENOMEM);
2743     }
2744 #endif
2745
2746     for (i = 0; frame->data[i]; i++) {
2747         int width  = s->avctx->coded_width;
2748         int height = s->avctx->coded_height;
2749         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2750         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2751         uint8_t md5[16];
2752
2753         av_md5_init(s->md5_ctx);
2754         for (j = 0; j < h; j++) {
2755             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2756 #if HAVE_BIGENDIAN
2757             if (pixel_shift) {
2758                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2759                                    (const uint16_t*)src, w);
2760                 src = s->checksum_buf;
2761             }
2762 #endif
2763             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2764         }
2765         av_md5_final(s->md5_ctx, md5);
2766
2767         if (!memcmp(md5, s->md5[i], 16)) {
2768             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2769             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2770             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2771         } else {
2772             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2773             print_md5(s->avctx, AV_LOG_ERROR, md5);
2774             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2775             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2776             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2777             return AVERROR_INVALIDDATA;
2778         }
2779     }
2780
2781     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2782
2783     return 0;
2784 }
2785
2786 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2787                              AVPacket *avpkt)
2788 {
2789     int ret;
2790     HEVCContext *s = avctx->priv_data;
2791
2792     if (!avpkt->size) {
2793         ret = ff_hevc_output_frame(s, data, 1);
2794         if (ret < 0)
2795             return ret;
2796
2797         *got_output = ret;
2798         return 0;
2799     }
2800
2801     s->ref = NULL;
2802     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2803     if (ret < 0)
2804         return ret;
2805
2806     /* verify the SEI checksum */
2807     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2808         s->is_md5) {
2809         ret = verify_md5(s, s->ref->frame);
2810         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2811             ff_hevc_unref_frame(s, s->ref, ~0);
2812             return ret;
2813         }
2814     }
2815     s->is_md5 = 0;
2816
2817     if (s->is_decoded) {
2818         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2819         s->is_decoded = 0;
2820     }
2821
2822     if (s->output_frame->buf[0]) {
2823         av_frame_move_ref(data, s->output_frame);
2824         *got_output = 1;
2825     }
2826
2827     return avpkt->size;
2828 }
2829
2830 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2831 {
2832     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2833     if (ret < 0)
2834         return ret;
2835
2836     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2837     if (!dst->tab_mvf_buf)
2838         goto fail;
2839     dst->tab_mvf = src->tab_mvf;
2840
2841     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2842     if (!dst->rpl_tab_buf)
2843         goto fail;
2844     dst->rpl_tab = src->rpl_tab;
2845
2846     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2847     if (!dst->rpl_buf)
2848         goto fail;
2849
2850     dst->poc        = src->poc;
2851     dst->ctb_count  = src->ctb_count;
2852     dst->window     = src->window;
2853     dst->flags      = src->flags;
2854     dst->sequence   = src->sequence;
2855
2856     return 0;
2857 fail:
2858     ff_hevc_unref_frame(s, dst, ~0);
2859     return AVERROR(ENOMEM);
2860 }
2861
2862 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2863 {
2864     HEVCContext       *s = avctx->priv_data;
2865     HEVCLocalContext *lc = &s->HEVClc;
2866     int i;
2867
2868     pic_arrays_free(s);
2869
2870     av_freep(&lc->edge_emu_buffer);
2871     av_freep(&s->md5_ctx);
2872
2873     av_frame_free(&s->tmp_frame);
2874     av_frame_free(&s->output_frame);
2875
2876     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2877         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2878         av_frame_free(&s->DPB[i].frame);
2879     }
2880
2881     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2882         av_freep(&s->vps_list[i]);
2883     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2884         av_buffer_unref(&s->sps_list[i]);
2885     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2886         av_buffer_unref(&s->pps_list[i]);
2887
2888     for (i = 0; i < s->nals_allocated; i++)
2889         av_freep(&s->nals[i].rbsp_buffer);
2890     av_freep(&s->nals);
2891     s->nals_allocated = 0;
2892
2893     return 0;
2894 }
2895
2896 static av_cold int hevc_init_context(AVCodecContext *avctx)
2897 {
2898     HEVCContext *s = avctx->priv_data;
2899     int i;
2900
2901     s->avctx = avctx;
2902
2903     s->tmp_frame = av_frame_alloc();
2904     if (!s->tmp_frame)
2905         goto fail;
2906
2907     s->output_frame = av_frame_alloc();
2908     if (!s->output_frame)
2909         goto fail;
2910
2911     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2912         s->DPB[i].frame = av_frame_alloc();
2913         if (!s->DPB[i].frame)
2914             goto fail;
2915         s->DPB[i].tf.f = s->DPB[i].frame;
2916     }
2917
2918     s->max_ra = INT_MAX;
2919
2920     s->md5_ctx = av_md5_alloc();
2921     if (!s->md5_ctx)
2922         goto fail;
2923
2924     ff_dsputil_init(&s->dsp, avctx);
2925
2926     s->context_initialized = 1;
2927
2928     return 0;
2929
2930 fail:
2931     hevc_decode_free(avctx);
2932     return AVERROR(ENOMEM);
2933 }
2934
2935 static int hevc_update_thread_context(AVCodecContext *dst,
2936                                       const AVCodecContext *src)
2937 {
2938     HEVCContext *s  = dst->priv_data;
2939     HEVCContext *s0 = src->priv_data;
2940     int i, ret;
2941
2942     if (!s->context_initialized) {
2943         ret = hevc_init_context(dst);
2944         if (ret < 0)
2945             return ret;
2946     }
2947
2948     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2949         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2950         if (s0->DPB[i].frame->buf[0]) {
2951             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2952             if (ret < 0)
2953                 return ret;
2954         }
2955     }
2956
2957     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
2958         av_buffer_unref(&s->sps_list[i]);
2959         if (s0->sps_list[i]) {
2960             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
2961             if (!s->sps_list[i])
2962                 return AVERROR(ENOMEM);
2963         }
2964     }
2965
2966     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
2967         av_buffer_unref(&s->pps_list[i]);
2968         if (s0->pps_list[i]) {
2969             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
2970             if (!s->pps_list[i])
2971                 return AVERROR(ENOMEM);
2972         }
2973     }
2974
2975     if (s->sps != s0->sps)
2976         ret = set_sps(s, s0->sps);
2977
2978     s->seq_decode = s0->seq_decode;
2979     s->seq_output = s0->seq_output;
2980     s->pocTid0    = s0->pocTid0;
2981     s->max_ra     = s0->max_ra;
2982
2983     s->is_nalff        = s0->is_nalff;
2984     s->nal_length_size = s0->nal_length_size;
2985
2986     if (s0->eos) {
2987         s->seq_decode = (s->seq_decode + 1) & 0xff;
2988         s->max_ra = INT_MAX;
2989     }
2990
2991     return 0;
2992 }
2993
2994 static int hevc_decode_extradata(HEVCContext *s)
2995 {
2996     AVCodecContext *avctx = s->avctx;
2997     GetByteContext gb;
2998     int ret;
2999
3000     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3001
3002     if (avctx->extradata_size > 3 &&
3003         (avctx->extradata[0] || avctx->extradata[1] ||
3004          avctx->extradata[2] > 1)) {
3005         /* It seems the extradata is encoded as hvcC format.
3006          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3007          * is finalized. When finalized, configurationVersion will be 1 and we
3008          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3009         int i, j, num_arrays, nal_len_size;
3010
3011         s->is_nalff = 1;
3012
3013         bytestream2_skip(&gb, 21);
3014         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3015         num_arrays   = bytestream2_get_byte(&gb);
3016
3017         /* nal units in the hvcC always have length coded with 2 bytes,
3018          * so put a fake nal_length_size = 2 while parsing them */
3019         s->nal_length_size = 2;
3020
3021         /* Decode nal units from hvcC. */
3022         for (i = 0; i < num_arrays; i++) {
3023             int type = bytestream2_get_byte(&gb) & 0x3f;
3024             int cnt  = bytestream2_get_be16(&gb);
3025
3026             for (j = 0; j < cnt; j++) {
3027                 // +2 for the nal size field
3028                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3029                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3030                     av_log(s->avctx, AV_LOG_ERROR,
3031                            "Invalid NAL unit size in extradata.\n");
3032                     return AVERROR_INVALIDDATA;
3033                 }
3034
3035                 ret = decode_nal_units(s, gb.buffer, nalsize);
3036                 if (ret < 0) {
3037                     av_log(avctx, AV_LOG_ERROR,
3038                            "Decoding nal unit %d %d from hvcC failed\n",
3039                            type, i);
3040                     return ret;
3041                 }
3042                 bytestream2_skip(&gb, nalsize);
3043             }
3044         }
3045
3046         /* Now store right nal length size, that will be used to parse
3047          * all other nals */
3048         s->nal_length_size = nal_len_size;
3049     } else {
3050         s->is_nalff = 0;
3051         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3052         if (ret < 0)
3053             return ret;
3054     }
3055     return 0;
3056 }
3057
3058 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3059 {
3060     HEVCContext *s = avctx->priv_data;
3061     int ret;
3062
3063     ff_init_cabac_states();
3064
3065     avctx->internal->allocate_progress = 1;
3066
3067     ret = hevc_init_context(avctx);
3068     if (ret < 0)
3069         return ret;
3070
3071     if (avctx->extradata_size > 0 && avctx->extradata) {
3072         ret = hevc_decode_extradata(s);
3073         if (ret < 0) {
3074             hevc_decode_free(avctx);
3075             return ret;
3076         }
3077     }
3078
3079     return 0;
3080 }
3081
3082 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3083 {
3084     HEVCContext *s = avctx->priv_data;
3085     int ret;
3086
3087     memset(s, 0, sizeof(*s));
3088
3089     ret = hevc_init_context(avctx);
3090     if (ret < 0)
3091         return ret;
3092
3093     return 0;
3094 }
3095
3096 static void hevc_decode_flush(AVCodecContext *avctx)
3097 {
3098     HEVCContext *s = avctx->priv_data;
3099     ff_hevc_flush_dpb(s);
3100     s->max_ra = INT_MAX;
3101 }
3102
3103 #define OFFSET(x) offsetof(HEVCContext, x)
3104 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3105 static const AVOption options[] = {
3106     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3107         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3108     { NULL },
3109 };
3110
3111 static const AVClass hevc_decoder_class = {
3112     .class_name = "HEVC decoder",
3113     .item_name  = av_default_item_name,
3114     .option     = options,
3115     .version    = LIBAVUTIL_VERSION_INT,
3116 };
3117
3118 AVCodec ff_hevc_decoder = {
3119     .name                  = "hevc",
3120     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3121     .type                  = AVMEDIA_TYPE_VIDEO,
3122     .id                    = AV_CODEC_ID_HEVC,
3123     .priv_data_size        = sizeof(HEVCContext),
3124     .priv_class            = &hevc_decoder_class,
3125     .init                  = hevc_decode_init,
3126     .close                 = hevc_decode_free,
3127     .decode                = hevc_decode_frame,
3128     .flush                 = hevc_decode_flush,
3129     .update_thread_context = hevc_update_thread_context,
3130     .init_thread_copy      = hevc_init_thread_copy,
3131     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3132                              CODEC_CAP_FRAME_THREADS,
3133 };